1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
4 * Copyright (C) 2015-2019 Google, Inc.
7 #include <linux/cpumask.h>
8 #include <linux/etherdevice.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/sched.h>
13 #include <linux/timer.h>
14 #include <linux/workqueue.h>
15 #include <net/sch_generic.h>
17 #include "gve_adminq.h"
18 #include "gve_register.h"
20 #define GVE_DEFAULT_RX_COPYBREAK (256)
22 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
23 #define GVE_VERSION "1.0.0"
24 #define GVE_VERSION_PREFIX "GVE-"
26 const char gve_version_str
[] = GVE_VERSION
;
27 static const char gve_version_prefix
[] = GVE_VERSION_PREFIX
;
29 static void gve_get_stats(struct net_device
*dev
, struct rtnl_link_stats64
*s
)
31 struct gve_priv
*priv
= netdev_priv(dev
);
36 for (ring
= 0; ring
< priv
->rx_cfg
.num_queues
; ring
++) {
39 u64_stats_fetch_begin(&priv
->rx
[ring
].statss
);
40 s
->rx_packets
+= priv
->rx
[ring
].rpackets
;
41 s
->rx_bytes
+= priv
->rx
[ring
].rbytes
;
42 } while (u64_stats_fetch_retry(&priv
->rx
[ring
].statss
,
47 for (ring
= 0; ring
< priv
->tx_cfg
.num_queues
; ring
++) {
50 u64_stats_fetch_begin(&priv
->tx
[ring
].statss
);
51 s
->tx_packets
+= priv
->tx
[ring
].pkt_done
;
52 s
->tx_bytes
+= priv
->tx
[ring
].bytes_done
;
53 } while (u64_stats_fetch_retry(&priv
->tx
[ring
].statss
,
59 static int gve_alloc_counter_array(struct gve_priv
*priv
)
62 dma_alloc_coherent(&priv
->pdev
->dev
,
63 priv
->num_event_counters
*
64 sizeof(*priv
->counter_array
),
65 &priv
->counter_array_bus
, GFP_KERNEL
);
66 if (!priv
->counter_array
)
72 static void gve_free_counter_array(struct gve_priv
*priv
)
74 dma_free_coherent(&priv
->pdev
->dev
,
75 priv
->num_event_counters
*
76 sizeof(*priv
->counter_array
),
77 priv
->counter_array
, priv
->counter_array_bus
);
78 priv
->counter_array
= NULL
;
81 /* NIC requests to report stats */
82 static void gve_stats_report_task(struct work_struct
*work
)
84 struct gve_priv
*priv
= container_of(work
, struct gve_priv
,
86 if (gve_get_do_report_stats(priv
)) {
87 gve_handle_report_stats(priv
);
88 gve_clear_do_report_stats(priv
);
92 static void gve_stats_report_schedule(struct gve_priv
*priv
)
94 if (!gve_get_probe_in_progress(priv
) &&
95 !gve_get_reset_in_progress(priv
)) {
96 gve_set_do_report_stats(priv
);
97 queue_work(priv
->gve_wq
, &priv
->stats_report_task
);
101 static void gve_stats_report_timer(struct timer_list
*t
)
103 struct gve_priv
*priv
= from_timer(priv
, t
, stats_report_timer
);
105 mod_timer(&priv
->stats_report_timer
,
106 round_jiffies(jiffies
+
107 msecs_to_jiffies(priv
->stats_report_timer_period
)));
108 gve_stats_report_schedule(priv
);
111 static int gve_alloc_stats_report(struct gve_priv
*priv
)
113 int tx_stats_num
, rx_stats_num
;
115 tx_stats_num
= (GVE_TX_STATS_REPORT_NUM
+ NIC_TX_STATS_REPORT_NUM
) *
116 priv
->tx_cfg
.num_queues
;
117 rx_stats_num
= (GVE_RX_STATS_REPORT_NUM
+ NIC_RX_STATS_REPORT_NUM
) *
118 priv
->rx_cfg
.num_queues
;
119 priv
->stats_report_len
= struct_size(priv
->stats_report
, stats
,
120 tx_stats_num
+ rx_stats_num
);
122 dma_alloc_coherent(&priv
->pdev
->dev
, priv
->stats_report_len
,
123 &priv
->stats_report_bus
, GFP_KERNEL
);
124 if (!priv
->stats_report
)
126 /* Set up timer for the report-stats task */
127 timer_setup(&priv
->stats_report_timer
, gve_stats_report_timer
, 0);
128 priv
->stats_report_timer_period
= GVE_STATS_REPORT_TIMER_PERIOD
;
132 static void gve_free_stats_report(struct gve_priv
*priv
)
134 del_timer_sync(&priv
->stats_report_timer
);
135 dma_free_coherent(&priv
->pdev
->dev
, priv
->stats_report_len
,
136 priv
->stats_report
, priv
->stats_report_bus
);
137 priv
->stats_report
= NULL
;
140 static irqreturn_t
gve_mgmnt_intr(int irq
, void *arg
)
142 struct gve_priv
*priv
= arg
;
144 queue_work(priv
->gve_wq
, &priv
->service_task
);
148 static irqreturn_t
gve_intr(int irq
, void *arg
)
150 struct gve_notify_block
*block
= arg
;
151 struct gve_priv
*priv
= block
->priv
;
153 iowrite32be(GVE_IRQ_MASK
, gve_irq_doorbell(priv
, block
));
154 napi_schedule_irqoff(&block
->napi
);
158 static int gve_napi_poll(struct napi_struct
*napi
, int budget
)
160 struct gve_notify_block
*block
;
161 __be32 __iomem
*irq_doorbell
;
162 bool reschedule
= false;
163 struct gve_priv
*priv
;
165 block
= container_of(napi
, struct gve_notify_block
, napi
);
169 reschedule
|= gve_tx_poll(block
, budget
);
171 reschedule
|= gve_rx_poll(block
, budget
);
177 irq_doorbell
= gve_irq_doorbell(priv
, block
);
178 iowrite32be(GVE_IRQ_ACK
| GVE_IRQ_EVENT
, irq_doorbell
);
180 /* Double check we have no extra work.
181 * Ensure unmask synchronizes with checking for work.
185 reschedule
|= gve_tx_poll(block
, -1);
187 reschedule
|= gve_rx_poll(block
, -1);
188 if (reschedule
&& napi_reschedule(napi
))
189 iowrite32be(GVE_IRQ_MASK
, irq_doorbell
);
194 static int gve_alloc_notify_blocks(struct gve_priv
*priv
)
196 int num_vecs_requested
= priv
->num_ntfy_blks
+ 1;
197 char *name
= priv
->dev
->name
;
198 unsigned int active_cpus
;
203 priv
->msix_vectors
= kvzalloc(num_vecs_requested
*
204 sizeof(*priv
->msix_vectors
), GFP_KERNEL
);
205 if (!priv
->msix_vectors
)
207 for (i
= 0; i
< num_vecs_requested
; i
++)
208 priv
->msix_vectors
[i
].entry
= i
;
209 vecs_enabled
= pci_enable_msix_range(priv
->pdev
, priv
->msix_vectors
,
210 GVE_MIN_MSIX
, num_vecs_requested
);
211 if (vecs_enabled
< 0) {
212 dev_err(&priv
->pdev
->dev
, "Could not enable min msix %d/%d\n",
213 GVE_MIN_MSIX
, vecs_enabled
);
215 goto abort_with_msix_vectors
;
217 if (vecs_enabled
!= num_vecs_requested
) {
218 int new_num_ntfy_blks
= (vecs_enabled
- 1) & ~0x1;
219 int vecs_per_type
= new_num_ntfy_blks
/ 2;
220 int vecs_left
= new_num_ntfy_blks
% 2;
222 priv
->num_ntfy_blks
= new_num_ntfy_blks
;
223 priv
->tx_cfg
.max_queues
= min_t(int, priv
->tx_cfg
.max_queues
,
225 priv
->rx_cfg
.max_queues
= min_t(int, priv
->rx_cfg
.max_queues
,
226 vecs_per_type
+ vecs_left
);
227 dev_err(&priv
->pdev
->dev
,
228 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
229 vecs_enabled
, priv
->tx_cfg
.max_queues
,
230 priv
->rx_cfg
.max_queues
);
231 if (priv
->tx_cfg
.num_queues
> priv
->tx_cfg
.max_queues
)
232 priv
->tx_cfg
.num_queues
= priv
->tx_cfg
.max_queues
;
233 if (priv
->rx_cfg
.num_queues
> priv
->rx_cfg
.max_queues
)
234 priv
->rx_cfg
.num_queues
= priv
->rx_cfg
.max_queues
;
236 /* Half the notification blocks go to TX and half to RX */
237 active_cpus
= min_t(int, priv
->num_ntfy_blks
/ 2, num_online_cpus());
239 /* Setup Management Vector - the last vector */
240 snprintf(priv
->mgmt_msix_name
, sizeof(priv
->mgmt_msix_name
), "%s-mgmnt",
242 err
= request_irq(priv
->msix_vectors
[priv
->mgmt_msix_idx
].vector
,
243 gve_mgmnt_intr
, 0, priv
->mgmt_msix_name
, priv
);
245 dev_err(&priv
->pdev
->dev
, "Did not receive management vector.\n");
246 goto abort_with_msix_enabled
;
249 dma_alloc_coherent(&priv
->pdev
->dev
,
250 priv
->num_ntfy_blks
*
251 sizeof(*priv
->ntfy_blocks
),
252 &priv
->ntfy_block_bus
, GFP_KERNEL
);
253 if (!priv
->ntfy_blocks
) {
255 goto abort_with_mgmt_vector
;
257 /* Setup the other blocks - the first n-1 vectors */
258 for (i
= 0; i
< priv
->num_ntfy_blks
; i
++) {
259 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[i
];
262 snprintf(block
->name
, sizeof(block
->name
), "%s-ntfy-block.%d",
265 err
= request_irq(priv
->msix_vectors
[msix_idx
].vector
,
266 gve_intr
, 0, block
->name
, block
);
268 dev_err(&priv
->pdev
->dev
,
269 "Failed to receive msix vector %d\n", i
);
270 goto abort_with_some_ntfy_blocks
;
272 irq_set_affinity_hint(priv
->msix_vectors
[msix_idx
].vector
,
273 get_cpu_mask(i
% active_cpus
));
276 abort_with_some_ntfy_blocks
:
277 for (j
= 0; j
< i
; j
++) {
278 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[j
];
281 irq_set_affinity_hint(priv
->msix_vectors
[msix_idx
].vector
,
283 free_irq(priv
->msix_vectors
[msix_idx
].vector
, block
);
285 dma_free_coherent(&priv
->pdev
->dev
, priv
->num_ntfy_blks
*
286 sizeof(*priv
->ntfy_blocks
),
287 priv
->ntfy_blocks
, priv
->ntfy_block_bus
);
288 priv
->ntfy_blocks
= NULL
;
289 abort_with_mgmt_vector
:
290 free_irq(priv
->msix_vectors
[priv
->mgmt_msix_idx
].vector
, priv
);
291 abort_with_msix_enabled
:
292 pci_disable_msix(priv
->pdev
);
293 abort_with_msix_vectors
:
294 kvfree(priv
->msix_vectors
);
295 priv
->msix_vectors
= NULL
;
299 static void gve_free_notify_blocks(struct gve_priv
*priv
)
304 for (i
= 0; i
< priv
->num_ntfy_blks
; i
++) {
305 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[i
];
308 irq_set_affinity_hint(priv
->msix_vectors
[msix_idx
].vector
,
310 free_irq(priv
->msix_vectors
[msix_idx
].vector
, block
);
312 dma_free_coherent(&priv
->pdev
->dev
,
313 priv
->num_ntfy_blks
* sizeof(*priv
->ntfy_blocks
),
314 priv
->ntfy_blocks
, priv
->ntfy_block_bus
);
315 priv
->ntfy_blocks
= NULL
;
316 free_irq(priv
->msix_vectors
[priv
->mgmt_msix_idx
].vector
, priv
);
317 pci_disable_msix(priv
->pdev
);
318 kvfree(priv
->msix_vectors
);
319 priv
->msix_vectors
= NULL
;
322 static int gve_setup_device_resources(struct gve_priv
*priv
)
326 err
= gve_alloc_counter_array(priv
);
329 err
= gve_alloc_notify_blocks(priv
);
331 goto abort_with_counter
;
332 err
= gve_alloc_stats_report(priv
);
334 goto abort_with_ntfy_blocks
;
335 err
= gve_adminq_configure_device_resources(priv
,
336 priv
->counter_array_bus
,
337 priv
->num_event_counters
,
338 priv
->ntfy_block_bus
,
339 priv
->num_ntfy_blks
);
341 dev_err(&priv
->pdev
->dev
,
342 "could not setup device_resources: err=%d\n", err
);
344 goto abort_with_stats_report
;
346 err
= gve_adminq_report_stats(priv
, priv
->stats_report_len
,
347 priv
->stats_report_bus
,
348 GVE_STATS_REPORT_TIMER_PERIOD
);
350 dev_err(&priv
->pdev
->dev
,
351 "Failed to report stats: err=%d\n", err
);
352 gve_set_device_resources_ok(priv
);
354 abort_with_stats_report
:
355 gve_free_stats_report(priv
);
356 abort_with_ntfy_blocks
:
357 gve_free_notify_blocks(priv
);
359 gve_free_counter_array(priv
);
363 static void gve_trigger_reset(struct gve_priv
*priv
);
365 static void gve_teardown_device_resources(struct gve_priv
*priv
)
369 /* Tell device its resources are being freed */
370 if (gve_get_device_resources_ok(priv
)) {
371 /* detach the stats report */
372 err
= gve_adminq_report_stats(priv
, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD
);
374 dev_err(&priv
->pdev
->dev
,
375 "Failed to detach stats report: err=%d\n", err
);
376 gve_trigger_reset(priv
);
378 err
= gve_adminq_deconfigure_device_resources(priv
);
380 dev_err(&priv
->pdev
->dev
,
381 "Could not deconfigure device resources: err=%d\n",
383 gve_trigger_reset(priv
);
386 gve_free_counter_array(priv
);
387 gve_free_notify_blocks(priv
);
388 gve_free_stats_report(priv
);
389 gve_clear_device_resources_ok(priv
);
392 static void gve_add_napi(struct gve_priv
*priv
, int ntfy_idx
)
394 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
396 netif_napi_add(priv
->dev
, &block
->napi
, gve_napi_poll
,
400 static void gve_remove_napi(struct gve_priv
*priv
, int ntfy_idx
)
402 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
404 netif_napi_del(&block
->napi
);
407 static int gve_register_qpls(struct gve_priv
*priv
)
409 int num_qpls
= gve_num_tx_qpls(priv
) + gve_num_rx_qpls(priv
);
413 for (i
= 0; i
< num_qpls
; i
++) {
414 err
= gve_adminq_register_page_list(priv
, &priv
->qpls
[i
]);
416 netif_err(priv
, drv
, priv
->dev
,
417 "failed to register queue page list %d\n",
419 /* This failure will trigger a reset - no need to clean
428 static int gve_unregister_qpls(struct gve_priv
*priv
)
430 int num_qpls
= gve_num_tx_qpls(priv
) + gve_num_rx_qpls(priv
);
434 for (i
= 0; i
< num_qpls
; i
++) {
435 err
= gve_adminq_unregister_page_list(priv
, priv
->qpls
[i
].id
);
436 /* This failure will trigger a reset - no need to clean up */
438 netif_err(priv
, drv
, priv
->dev
,
439 "Failed to unregister queue page list %d\n",
447 static int gve_create_rings(struct gve_priv
*priv
)
452 err
= gve_adminq_create_tx_queues(priv
, priv
->tx_cfg
.num_queues
);
454 netif_err(priv
, drv
, priv
->dev
, "failed to create %d tx queues\n",
455 priv
->tx_cfg
.num_queues
);
456 /* This failure will trigger a reset - no need to clean
461 netif_dbg(priv
, drv
, priv
->dev
, "created %d tx queues\n",
462 priv
->tx_cfg
.num_queues
);
464 err
= gve_adminq_create_rx_queues(priv
, priv
->rx_cfg
.num_queues
);
466 netif_err(priv
, drv
, priv
->dev
, "failed to create %d rx queues\n",
467 priv
->rx_cfg
.num_queues
);
468 /* This failure will trigger a reset - no need to clean
473 netif_dbg(priv
, drv
, priv
->dev
, "created %d rx queues\n",
474 priv
->rx_cfg
.num_queues
);
476 /* Rx data ring has been prefilled with packet buffers at queue
478 * Write the doorbell to provide descriptor slots and packet buffers
481 for (i
= 0; i
< priv
->rx_cfg
.num_queues
; i
++)
482 gve_rx_write_doorbell(priv
, &priv
->rx
[i
]);
487 static int gve_alloc_rings(struct gve_priv
*priv
)
494 priv
->tx
= kvzalloc(priv
->tx_cfg
.num_queues
* sizeof(*priv
->tx
),
498 err
= gve_tx_alloc_rings(priv
);
502 priv
->rx
= kvzalloc(priv
->rx_cfg
.num_queues
* sizeof(*priv
->rx
),
508 err
= gve_rx_alloc_rings(priv
);
511 /* Add tx napi & init sync stats*/
512 for (i
= 0; i
< priv
->tx_cfg
.num_queues
; i
++) {
513 u64_stats_init(&priv
->tx
[i
].statss
);
514 ntfy_idx
= gve_tx_idx_to_ntfy(priv
, i
);
515 gve_add_napi(priv
, ntfy_idx
);
517 /* Add rx napi & init sync stats*/
518 for (i
= 0; i
< priv
->rx_cfg
.num_queues
; i
++) {
519 u64_stats_init(&priv
->rx
[i
].statss
);
520 ntfy_idx
= gve_rx_idx_to_ntfy(priv
, i
);
521 gve_add_napi(priv
, ntfy_idx
);
530 gve_tx_free_rings(priv
);
537 static int gve_destroy_rings(struct gve_priv
*priv
)
541 err
= gve_adminq_destroy_tx_queues(priv
, priv
->tx_cfg
.num_queues
);
543 netif_err(priv
, drv
, priv
->dev
,
544 "failed to destroy tx queues\n");
545 /* This failure will trigger a reset - no need to clean up */
548 netif_dbg(priv
, drv
, priv
->dev
, "destroyed tx queues\n");
549 err
= gve_adminq_destroy_rx_queues(priv
, priv
->rx_cfg
.num_queues
);
551 netif_err(priv
, drv
, priv
->dev
,
552 "failed to destroy rx queues\n");
553 /* This failure will trigger a reset - no need to clean up */
556 netif_dbg(priv
, drv
, priv
->dev
, "destroyed rx queues\n");
560 static void gve_free_rings(struct gve_priv
*priv
)
566 for (i
= 0; i
< priv
->tx_cfg
.num_queues
; i
++) {
567 ntfy_idx
= gve_tx_idx_to_ntfy(priv
, i
);
568 gve_remove_napi(priv
, ntfy_idx
);
570 gve_tx_free_rings(priv
);
575 for (i
= 0; i
< priv
->rx_cfg
.num_queues
; i
++) {
576 ntfy_idx
= gve_rx_idx_to_ntfy(priv
, i
);
577 gve_remove_napi(priv
, ntfy_idx
);
579 gve_rx_free_rings(priv
);
585 int gve_alloc_page(struct gve_priv
*priv
, struct device
*dev
,
586 struct page
**page
, dma_addr_t
*dma
,
587 enum dma_data_direction dir
)
589 *page
= alloc_page(GFP_KERNEL
);
591 priv
->page_alloc_fail
++;
594 *dma
= dma_map_page(dev
, *page
, 0, PAGE_SIZE
, dir
);
595 if (dma_mapping_error(dev
, *dma
)) {
596 priv
->dma_mapping_error
++;
603 static int gve_alloc_queue_page_list(struct gve_priv
*priv
, u32 id
,
606 struct gve_queue_page_list
*qpl
= &priv
->qpls
[id
];
610 if (pages
+ priv
->num_registered_pages
> priv
->max_registered_pages
) {
611 netif_err(priv
, drv
, priv
->dev
,
612 "Reached max number of registered pages %llu > %llu\n",
613 pages
+ priv
->num_registered_pages
,
614 priv
->max_registered_pages
);
619 qpl
->num_entries
= 0;
620 qpl
->pages
= kvzalloc(pages
* sizeof(*qpl
->pages
), GFP_KERNEL
);
621 /* caller handles clean up */
624 qpl
->page_buses
= kvzalloc(pages
* sizeof(*qpl
->page_buses
),
626 /* caller handles clean up */
627 if (!qpl
->page_buses
)
630 for (i
= 0; i
< pages
; i
++) {
631 err
= gve_alloc_page(priv
, &priv
->pdev
->dev
, &qpl
->pages
[i
],
633 gve_qpl_dma_dir(priv
, id
));
634 /* caller handles clean up */
639 priv
->num_registered_pages
+= pages
;
644 void gve_free_page(struct device
*dev
, struct page
*page
, dma_addr_t dma
,
645 enum dma_data_direction dir
)
647 if (!dma_mapping_error(dev
, dma
))
648 dma_unmap_page(dev
, dma
, PAGE_SIZE
, dir
);
653 static void gve_free_queue_page_list(struct gve_priv
*priv
,
656 struct gve_queue_page_list
*qpl
= &priv
->qpls
[id
];
661 if (!qpl
->page_buses
)
664 for (i
= 0; i
< qpl
->num_entries
; i
++)
665 gve_free_page(&priv
->pdev
->dev
, qpl
->pages
[i
],
666 qpl
->page_buses
[i
], gve_qpl_dma_dir(priv
, id
));
668 kvfree(qpl
->page_buses
);
671 priv
->num_registered_pages
-= qpl
->num_entries
;
674 static int gve_alloc_qpls(struct gve_priv
*priv
)
676 int num_qpls
= gve_num_tx_qpls(priv
) + gve_num_rx_qpls(priv
);
680 /* Raw addressing means no QPLs */
681 if (priv
->raw_addressing
)
684 priv
->qpls
= kvzalloc(num_qpls
* sizeof(*priv
->qpls
), GFP_KERNEL
);
688 for (i
= 0; i
< gve_num_tx_qpls(priv
); i
++) {
689 err
= gve_alloc_queue_page_list(priv
, i
,
690 priv
->tx_pages_per_qpl
);
694 for (; i
< num_qpls
; i
++) {
695 err
= gve_alloc_queue_page_list(priv
, i
,
696 priv
->rx_data_slot_cnt
);
701 priv
->qpl_cfg
.qpl_map_size
= BITS_TO_LONGS(num_qpls
) *
702 sizeof(unsigned long) * BITS_PER_BYTE
;
703 priv
->qpl_cfg
.qpl_id_map
= kvzalloc(BITS_TO_LONGS(num_qpls
) *
704 sizeof(unsigned long), GFP_KERNEL
);
705 if (!priv
->qpl_cfg
.qpl_id_map
) {
713 for (j
= 0; j
<= i
; j
++)
714 gve_free_queue_page_list(priv
, j
);
719 static void gve_free_qpls(struct gve_priv
*priv
)
721 int num_qpls
= gve_num_tx_qpls(priv
) + gve_num_rx_qpls(priv
);
724 /* Raw addressing means no QPLs */
725 if (priv
->raw_addressing
)
728 kvfree(priv
->qpl_cfg
.qpl_id_map
);
730 for (i
= 0; i
< num_qpls
; i
++)
731 gve_free_queue_page_list(priv
, i
);
736 /* Use this to schedule a reset when the device is capable of continuing
737 * to handle other requests in its current state. If it is not, do a reset
740 void gve_schedule_reset(struct gve_priv
*priv
)
742 gve_set_do_reset(priv
);
743 queue_work(priv
->gve_wq
, &priv
->service_task
);
746 static void gve_reset_and_teardown(struct gve_priv
*priv
, bool was_up
);
747 static int gve_reset_recovery(struct gve_priv
*priv
, bool was_up
);
748 static void gve_turndown(struct gve_priv
*priv
);
749 static void gve_turnup(struct gve_priv
*priv
);
751 static int gve_open(struct net_device
*dev
)
753 struct gve_priv
*priv
= netdev_priv(dev
);
756 err
= gve_alloc_qpls(priv
);
759 err
= gve_alloc_rings(priv
);
763 err
= netif_set_real_num_tx_queues(dev
, priv
->tx_cfg
.num_queues
);
766 err
= netif_set_real_num_rx_queues(dev
, priv
->rx_cfg
.num_queues
);
770 err
= gve_register_qpls(priv
);
773 err
= gve_create_rings(priv
);
776 gve_set_device_rings_ok(priv
);
778 if (gve_get_report_stats(priv
))
779 mod_timer(&priv
->stats_report_timer
,
780 round_jiffies(jiffies
+
781 msecs_to_jiffies(priv
->stats_report_timer_period
)));
784 queue_work(priv
->gve_wq
, &priv
->service_task
);
785 priv
->interface_up_cnt
++;
789 gve_free_rings(priv
);
795 /* This must have been called from a reset due to the rtnl lock
796 * so just return at this point.
798 if (gve_get_reset_in_progress(priv
))
800 /* Otherwise reset before returning */
801 gve_reset_and_teardown(priv
, true);
802 /* if this fails there is nothing we can do so just ignore the return */
803 gve_reset_recovery(priv
, false);
804 /* return the original error */
808 static int gve_close(struct net_device
*dev
)
810 struct gve_priv
*priv
= netdev_priv(dev
);
813 netif_carrier_off(dev
);
814 if (gve_get_device_rings_ok(priv
)) {
816 err
= gve_destroy_rings(priv
);
819 err
= gve_unregister_qpls(priv
);
822 gve_clear_device_rings_ok(priv
);
824 del_timer_sync(&priv
->stats_report_timer
);
826 gve_free_rings(priv
);
828 priv
->interface_down_cnt
++;
832 /* This must have been called from a reset due to the rtnl lock
833 * so just return at this point.
835 if (gve_get_reset_in_progress(priv
))
837 /* Otherwise reset before returning */
838 gve_reset_and_teardown(priv
, true);
839 return gve_reset_recovery(priv
, false);
842 int gve_adjust_queues(struct gve_priv
*priv
,
843 struct gve_queue_config new_rx_config
,
844 struct gve_queue_config new_tx_config
)
848 if (netif_carrier_ok(priv
->dev
)) {
849 /* To make this process as simple as possible we teardown the
850 * device, set the new configuration, and then bring the device
853 err
= gve_close(priv
->dev
);
854 /* we have already tried to reset in close,
855 * just fail at this point
859 priv
->tx_cfg
= new_tx_config
;
860 priv
->rx_cfg
= new_rx_config
;
862 err
= gve_open(priv
->dev
);
868 /* Set the config for the next up. */
869 priv
->tx_cfg
= new_tx_config
;
870 priv
->rx_cfg
= new_rx_config
;
874 netif_err(priv
, drv
, priv
->dev
,
875 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
880 static void gve_turndown(struct gve_priv
*priv
)
884 if (netif_carrier_ok(priv
->dev
))
885 netif_carrier_off(priv
->dev
);
887 if (!gve_get_napi_enabled(priv
))
890 /* Disable napi to prevent more work from coming in */
891 for (idx
= 0; idx
< priv
->tx_cfg
.num_queues
; idx
++) {
892 int ntfy_idx
= gve_tx_idx_to_ntfy(priv
, idx
);
893 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
895 napi_disable(&block
->napi
);
897 for (idx
= 0; idx
< priv
->rx_cfg
.num_queues
; idx
++) {
898 int ntfy_idx
= gve_rx_idx_to_ntfy(priv
, idx
);
899 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
901 napi_disable(&block
->napi
);
905 netif_tx_disable(priv
->dev
);
907 gve_clear_napi_enabled(priv
);
908 gve_clear_report_stats(priv
);
911 static void gve_turnup(struct gve_priv
*priv
)
915 /* Start the tx queues */
916 netif_tx_start_all_queues(priv
->dev
);
918 /* Enable napi and unmask interrupts for all queues */
919 for (idx
= 0; idx
< priv
->tx_cfg
.num_queues
; idx
++) {
920 int ntfy_idx
= gve_tx_idx_to_ntfy(priv
, idx
);
921 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
923 napi_enable(&block
->napi
);
924 iowrite32be(0, gve_irq_doorbell(priv
, block
));
926 for (idx
= 0; idx
< priv
->rx_cfg
.num_queues
; idx
++) {
927 int ntfy_idx
= gve_rx_idx_to_ntfy(priv
, idx
);
928 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
930 napi_enable(&block
->napi
);
931 iowrite32be(0, gve_irq_doorbell(priv
, block
));
934 gve_set_napi_enabled(priv
);
937 static void gve_tx_timeout(struct net_device
*dev
, unsigned int txqueue
)
939 struct gve_priv
*priv
= netdev_priv(dev
);
941 gve_schedule_reset(priv
);
942 priv
->tx_timeo_cnt
++;
945 static const struct net_device_ops gve_netdev_ops
= {
946 .ndo_start_xmit
= gve_tx
,
947 .ndo_open
= gve_open
,
948 .ndo_stop
= gve_close
,
949 .ndo_get_stats64
= gve_get_stats
,
950 .ndo_tx_timeout
= gve_tx_timeout
,
953 static void gve_handle_status(struct gve_priv
*priv
, u32 status
)
955 if (GVE_DEVICE_STATUS_RESET_MASK
& status
) {
956 dev_info(&priv
->pdev
->dev
, "Device requested reset.\n");
957 gve_set_do_reset(priv
);
959 if (GVE_DEVICE_STATUS_REPORT_STATS_MASK
& status
) {
960 priv
->stats_report_trigger_cnt
++;
961 gve_set_do_report_stats(priv
);
965 static void gve_handle_reset(struct gve_priv
*priv
)
967 /* A service task will be scheduled at the end of probe to catch any
968 * resets that need to happen, and we don't want to reset until
971 if (gve_get_probe_in_progress(priv
))
974 if (gve_get_do_reset(priv
)) {
976 gve_reset(priv
, false);
981 void gve_handle_report_stats(struct gve_priv
*priv
)
983 int idx
, stats_idx
= 0, tx_bytes
;
984 unsigned int start
= 0;
985 struct stats
*stats
= priv
->stats_report
->stats
;
987 if (!gve_get_report_stats(priv
))
990 be64_add_cpu(&priv
->stats_report
->written_count
, 1);
993 for (idx
= 0; idx
< priv
->tx_cfg
.num_queues
; idx
++) {
995 start
= u64_stats_fetch_begin(&priv
->tx
[idx
].statss
);
996 tx_bytes
= priv
->tx
[idx
].bytes_done
;
997 } while (u64_stats_fetch_retry(&priv
->tx
[idx
].statss
, start
));
998 stats
[stats_idx
++] = (struct stats
) {
999 .stat_name
= cpu_to_be32(TX_WAKE_CNT
),
1000 .value
= cpu_to_be64(priv
->tx
[idx
].wake_queue
),
1001 .queue_id
= cpu_to_be32(idx
),
1003 stats
[stats_idx
++] = (struct stats
) {
1004 .stat_name
= cpu_to_be32(TX_STOP_CNT
),
1005 .value
= cpu_to_be64(priv
->tx
[idx
].stop_queue
),
1006 .queue_id
= cpu_to_be32(idx
),
1008 stats
[stats_idx
++] = (struct stats
) {
1009 .stat_name
= cpu_to_be32(TX_FRAMES_SENT
),
1010 .value
= cpu_to_be64(priv
->tx
[idx
].req
),
1011 .queue_id
= cpu_to_be32(idx
),
1013 stats
[stats_idx
++] = (struct stats
) {
1014 .stat_name
= cpu_to_be32(TX_BYTES_SENT
),
1015 .value
= cpu_to_be64(tx_bytes
),
1016 .queue_id
= cpu_to_be32(idx
),
1018 stats
[stats_idx
++] = (struct stats
) {
1019 .stat_name
= cpu_to_be32(TX_LAST_COMPLETION_PROCESSED
),
1020 .value
= cpu_to_be64(priv
->tx
[idx
].done
),
1021 .queue_id
= cpu_to_be32(idx
),
1027 for (idx
= 0; idx
< priv
->rx_cfg
.num_queues
; idx
++) {
1028 stats
[stats_idx
++] = (struct stats
) {
1029 .stat_name
= cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE
),
1030 .value
= cpu_to_be64(priv
->rx
[idx
].desc
.seqno
),
1031 .queue_id
= cpu_to_be32(idx
),
1033 stats
[stats_idx
++] = (struct stats
) {
1034 .stat_name
= cpu_to_be32(RX_BUFFERS_POSTED
),
1035 .value
= cpu_to_be64(priv
->rx
[0].fill_cnt
),
1036 .queue_id
= cpu_to_be32(idx
),
1042 static void gve_handle_link_status(struct gve_priv
*priv
, bool link_status
)
1044 if (!gve_get_napi_enabled(priv
))
1047 if (link_status
== netif_carrier_ok(priv
->dev
))
1051 netdev_info(priv
->dev
, "Device link is up.\n");
1052 netif_carrier_on(priv
->dev
);
1054 netdev_info(priv
->dev
, "Device link is down.\n");
1055 netif_carrier_off(priv
->dev
);
1059 /* Handle NIC status register changes, reset requests and report stats */
1060 static void gve_service_task(struct work_struct
*work
)
1062 struct gve_priv
*priv
= container_of(work
, struct gve_priv
,
1064 u32 status
= ioread32be(&priv
->reg_bar0
->device_status
);
1066 gve_handle_status(priv
, status
);
1068 gve_handle_reset(priv
);
1069 gve_handle_link_status(priv
, GVE_DEVICE_STATUS_LINK_STATUS_MASK
& status
);
1072 static int gve_init_priv(struct gve_priv
*priv
, bool skip_describe_device
)
1077 /* Set up the adminq */
1078 err
= gve_adminq_alloc(&priv
->pdev
->dev
, priv
);
1080 dev_err(&priv
->pdev
->dev
,
1081 "Failed to alloc admin queue: err=%d\n", err
);
1085 if (skip_describe_device
)
1088 priv
->raw_addressing
= false;
1089 /* Get the initial information we need from the device */
1090 err
= gve_adminq_describe_device(priv
);
1092 dev_err(&priv
->pdev
->dev
,
1093 "Could not get device information: err=%d\n", err
);
1096 if (priv
->dev
->max_mtu
> PAGE_SIZE
) {
1097 priv
->dev
->max_mtu
= PAGE_SIZE
;
1098 err
= gve_adminq_set_mtu(priv
, priv
->dev
->mtu
);
1100 dev_err(&priv
->pdev
->dev
, "Could not set mtu");
1104 priv
->dev
->mtu
= priv
->dev
->max_mtu
;
1105 num_ntfy
= pci_msix_vec_count(priv
->pdev
);
1106 if (num_ntfy
<= 0) {
1107 dev_err(&priv
->pdev
->dev
,
1108 "could not count MSI-x vectors: err=%d\n", num_ntfy
);
1111 } else if (num_ntfy
< GVE_MIN_MSIX
) {
1112 dev_err(&priv
->pdev
->dev
, "gve needs at least %d MSI-x vectors, but only has %d\n",
1113 GVE_MIN_MSIX
, num_ntfy
);
1118 priv
->num_registered_pages
= 0;
1119 priv
->rx_copybreak
= GVE_DEFAULT_RX_COPYBREAK
;
1120 /* gvnic has one Notification Block per MSI-x vector, except for the
1123 priv
->num_ntfy_blks
= (num_ntfy
- 1) & ~0x1;
1124 priv
->mgmt_msix_idx
= priv
->num_ntfy_blks
;
1126 priv
->tx_cfg
.max_queues
=
1127 min_t(int, priv
->tx_cfg
.max_queues
, priv
->num_ntfy_blks
/ 2);
1128 priv
->rx_cfg
.max_queues
=
1129 min_t(int, priv
->rx_cfg
.max_queues
, priv
->num_ntfy_blks
/ 2);
1131 priv
->tx_cfg
.num_queues
= priv
->tx_cfg
.max_queues
;
1132 priv
->rx_cfg
.num_queues
= priv
->rx_cfg
.max_queues
;
1133 if (priv
->default_num_queues
> 0) {
1134 priv
->tx_cfg
.num_queues
= min_t(int, priv
->default_num_queues
,
1135 priv
->tx_cfg
.num_queues
);
1136 priv
->rx_cfg
.num_queues
= min_t(int, priv
->default_num_queues
,
1137 priv
->rx_cfg
.num_queues
);
1140 dev_info(&priv
->pdev
->dev
, "TX queues %d, RX queues %d\n",
1141 priv
->tx_cfg
.num_queues
, priv
->rx_cfg
.num_queues
);
1142 dev_info(&priv
->pdev
->dev
, "Max TX queues %d, Max RX queues %d\n",
1143 priv
->tx_cfg
.max_queues
, priv
->rx_cfg
.max_queues
);
1146 err
= gve_setup_device_resources(priv
);
1150 gve_adminq_free(&priv
->pdev
->dev
, priv
);
1154 static void gve_teardown_priv_resources(struct gve_priv
*priv
)
1156 gve_teardown_device_resources(priv
);
1157 gve_adminq_free(&priv
->pdev
->dev
, priv
);
1160 static void gve_trigger_reset(struct gve_priv
*priv
)
1162 /* Reset the device by releasing the AQ */
1163 gve_adminq_release(priv
);
1166 static void gve_reset_and_teardown(struct gve_priv
*priv
, bool was_up
)
1168 gve_trigger_reset(priv
);
1169 /* With the reset having already happened, close cannot fail */
1171 gve_close(priv
->dev
);
1172 gve_teardown_priv_resources(priv
);
1175 static int gve_reset_recovery(struct gve_priv
*priv
, bool was_up
)
1179 err
= gve_init_priv(priv
, true);
1183 err
= gve_open(priv
->dev
);
1189 dev_err(&priv
->pdev
->dev
, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1194 int gve_reset(struct gve_priv
*priv
, bool attempt_teardown
)
1196 bool was_up
= netif_carrier_ok(priv
->dev
);
1199 dev_info(&priv
->pdev
->dev
, "Performing reset\n");
1200 gve_clear_do_reset(priv
);
1201 gve_set_reset_in_progress(priv
);
1202 /* If we aren't attempting to teardown normally, just go turndown and
1205 if (!attempt_teardown
) {
1207 gve_reset_and_teardown(priv
, was_up
);
1209 /* Otherwise attempt to close normally */
1211 err
= gve_close(priv
->dev
);
1212 /* If that fails reset as we did above */
1214 gve_reset_and_teardown(priv
, was_up
);
1216 /* Clean up any remaining resources */
1217 gve_teardown_priv_resources(priv
);
1220 /* Set it all back up */
1221 err
= gve_reset_recovery(priv
, was_up
);
1222 gve_clear_reset_in_progress(priv
);
1224 priv
->interface_up_cnt
= 0;
1225 priv
->interface_down_cnt
= 0;
1226 priv
->stats_report_trigger_cnt
= 0;
1230 static void gve_write_version(u8 __iomem
*driver_version_register
)
1232 const char *c
= gve_version_prefix
;
1235 writeb(*c
, driver_version_register
);
1239 c
= gve_version_str
;
1241 writeb(*c
, driver_version_register
);
1244 writeb('\n', driver_version_register
);
1247 static int gve_probe(struct pci_dev
*pdev
, const struct pci_device_id
*ent
)
1249 int max_tx_queues
, max_rx_queues
;
1250 struct net_device
*dev
;
1251 __be32 __iomem
*db_bar
;
1252 struct gve_registers __iomem
*reg_bar
;
1253 struct gve_priv
*priv
;
1256 err
= pci_enable_device(pdev
);
1260 err
= pci_request_regions(pdev
, "gvnic-cfg");
1262 goto abort_with_enabled
;
1264 pci_set_master(pdev
);
1266 err
= pci_set_dma_mask(pdev
, DMA_BIT_MASK(64));
1268 dev_err(&pdev
->dev
, "Failed to set dma mask: err=%d\n", err
);
1269 goto abort_with_pci_region
;
1272 err
= pci_set_consistent_dma_mask(pdev
, DMA_BIT_MASK(64));
1275 "Failed to set consistent dma mask: err=%d\n", err
);
1276 goto abort_with_pci_region
;
1279 reg_bar
= pci_iomap(pdev
, GVE_REGISTER_BAR
, 0);
1281 dev_err(&pdev
->dev
, "Failed to map pci bar!\n");
1283 goto abort_with_pci_region
;
1286 db_bar
= pci_iomap(pdev
, GVE_DOORBELL_BAR
, 0);
1288 dev_err(&pdev
->dev
, "Failed to map doorbell bar!\n");
1290 goto abort_with_reg_bar
;
1293 gve_write_version(®_bar
->driver_version
);
1294 /* Get max queues to alloc etherdev */
1295 max_rx_queues
= ioread32be(®_bar
->max_tx_queues
);
1296 max_tx_queues
= ioread32be(®_bar
->max_rx_queues
);
1297 /* Alloc and setup the netdev and priv */
1298 dev
= alloc_etherdev_mqs(sizeof(*priv
), max_tx_queues
, max_rx_queues
);
1300 dev_err(&pdev
->dev
, "could not allocate netdev\n");
1301 goto abort_with_db_bar
;
1303 SET_NETDEV_DEV(dev
, &pdev
->dev
);
1304 pci_set_drvdata(pdev
, dev
);
1305 dev
->ethtool_ops
= &gve_ethtool_ops
;
1306 dev
->netdev_ops
= &gve_netdev_ops
;
1307 /* advertise features */
1308 dev
->hw_features
= NETIF_F_HIGHDMA
;
1309 dev
->hw_features
|= NETIF_F_SG
;
1310 dev
->hw_features
|= NETIF_F_HW_CSUM
;
1311 dev
->hw_features
|= NETIF_F_TSO
;
1312 dev
->hw_features
|= NETIF_F_TSO6
;
1313 dev
->hw_features
|= NETIF_F_TSO_ECN
;
1314 dev
->hw_features
|= NETIF_F_RXCSUM
;
1315 dev
->hw_features
|= NETIF_F_RXHASH
;
1316 dev
->features
= dev
->hw_features
;
1317 dev
->watchdog_timeo
= 5 * HZ
;
1318 dev
->min_mtu
= ETH_MIN_MTU
;
1319 netif_carrier_off(dev
);
1321 priv
= netdev_priv(dev
);
1324 priv
->msg_enable
= DEFAULT_MSG_LEVEL
;
1325 priv
->reg_bar0
= reg_bar
;
1326 priv
->db_bar2
= db_bar
;
1327 priv
->service_task_flags
= 0x0;
1328 priv
->state_flags
= 0x0;
1329 priv
->ethtool_flags
= 0x0;
1331 gve_set_probe_in_progress(priv
);
1332 priv
->gve_wq
= alloc_ordered_workqueue("gve", 0);
1333 if (!priv
->gve_wq
) {
1334 dev_err(&pdev
->dev
, "Could not allocate workqueue");
1336 goto abort_with_netdev
;
1338 INIT_WORK(&priv
->service_task
, gve_service_task
);
1339 INIT_WORK(&priv
->stats_report_task
, gve_stats_report_task
);
1340 priv
->tx_cfg
.max_queues
= max_tx_queues
;
1341 priv
->rx_cfg
.max_queues
= max_rx_queues
;
1343 err
= gve_init_priv(priv
, false);
1347 err
= register_netdev(dev
);
1351 dev_info(&pdev
->dev
, "GVE version %s\n", gve_version_str
);
1352 gve_clear_probe_in_progress(priv
);
1353 queue_work(priv
->gve_wq
, &priv
->service_task
);
1357 destroy_workqueue(priv
->gve_wq
);
1363 pci_iounmap(pdev
, db_bar
);
1366 pci_iounmap(pdev
, reg_bar
);
1368 abort_with_pci_region
:
1369 pci_release_regions(pdev
);
1372 pci_disable_device(pdev
);
1376 static void gve_remove(struct pci_dev
*pdev
)
1378 struct net_device
*netdev
= pci_get_drvdata(pdev
);
1379 struct gve_priv
*priv
= netdev_priv(netdev
);
1380 __be32 __iomem
*db_bar
= priv
->db_bar2
;
1381 void __iomem
*reg_bar
= priv
->reg_bar0
;
1383 unregister_netdev(netdev
);
1384 gve_teardown_priv_resources(priv
);
1385 destroy_workqueue(priv
->gve_wq
);
1386 free_netdev(netdev
);
1387 pci_iounmap(pdev
, db_bar
);
1388 pci_iounmap(pdev
, reg_bar
);
1389 pci_release_regions(pdev
);
1390 pci_disable_device(pdev
);
1393 static const struct pci_device_id gve_id_table
[] = {
1394 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE
, PCI_DEV_ID_GVNIC
) },
1398 static struct pci_driver gvnic_driver
= {
1400 .id_table
= gve_id_table
,
1402 .remove
= gve_remove
,
1405 module_pci_driver(gvnic_driver
);
1407 MODULE_DEVICE_TABLE(pci
, gve_id_table
);
1408 MODULE_AUTHOR("Google, Inc.");
1409 MODULE_DESCRIPTION("gVNIC Driver");
1410 MODULE_LICENSE("Dual MIT/GPL");
1411 MODULE_VERSION(GVE_VERSION
);