1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
4 * Copyright (C) 2015-2019 Google, Inc.
7 #include <linux/cpumask.h>
8 #include <linux/etherdevice.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/sched.h>
13 #include <linux/timer.h>
14 #include <linux/workqueue.h>
15 #include <net/sch_generic.h>
17 #include "gve_adminq.h"
18 #include "gve_register.h"
20 #define GVE_DEFAULT_RX_COPYBREAK (256)
22 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK)
23 #define GVE_VERSION "1.0.0"
24 #define GVE_VERSION_PREFIX "GVE-"
26 const char gve_version_str
[] = GVE_VERSION
;
27 static const char gve_version_prefix
[] = GVE_VERSION_PREFIX
;
29 static void gve_get_stats(struct net_device
*dev
, struct rtnl_link_stats64
*s
)
31 struct gve_priv
*priv
= netdev_priv(dev
);
36 for (ring
= 0; ring
< priv
->rx_cfg
.num_queues
; ring
++) {
39 u64_stats_fetch_begin(&priv
->rx
[ring
].statss
);
40 s
->rx_packets
+= priv
->rx
[ring
].rpackets
;
41 s
->rx_bytes
+= priv
->rx
[ring
].rbytes
;
42 } while (u64_stats_fetch_retry(&priv
->rx
[ring
].statss
,
47 for (ring
= 0; ring
< priv
->tx_cfg
.num_queues
; ring
++) {
50 u64_stats_fetch_begin(&priv
->tx
[ring
].statss
);
51 s
->tx_packets
+= priv
->tx
[ring
].pkt_done
;
52 s
->tx_bytes
+= priv
->tx
[ring
].bytes_done
;
53 } while (u64_stats_fetch_retry(&priv
->tx
[ring
].statss
,
59 static int gve_alloc_counter_array(struct gve_priv
*priv
)
62 dma_alloc_coherent(&priv
->pdev
->dev
,
63 priv
->num_event_counters
*
64 sizeof(*priv
->counter_array
),
65 &priv
->counter_array_bus
, GFP_KERNEL
);
66 if (!priv
->counter_array
)
72 static void gve_free_counter_array(struct gve_priv
*priv
)
74 dma_free_coherent(&priv
->pdev
->dev
,
75 priv
->num_event_counters
*
76 sizeof(*priv
->counter_array
),
77 priv
->counter_array
, priv
->counter_array_bus
);
78 priv
->counter_array
= NULL
;
81 static irqreturn_t
gve_mgmnt_intr(int irq
, void *arg
)
83 struct gve_priv
*priv
= arg
;
85 queue_work(priv
->gve_wq
, &priv
->service_task
);
89 static irqreturn_t
gve_intr(int irq
, void *arg
)
91 struct gve_notify_block
*block
= arg
;
92 struct gve_priv
*priv
= block
->priv
;
94 iowrite32be(GVE_IRQ_MASK
, gve_irq_doorbell(priv
, block
));
95 napi_schedule_irqoff(&block
->napi
);
99 static int gve_napi_poll(struct napi_struct
*napi
, int budget
)
101 struct gve_notify_block
*block
;
102 __be32 __iomem
*irq_doorbell
;
103 bool reschedule
= false;
104 struct gve_priv
*priv
;
106 block
= container_of(napi
, struct gve_notify_block
, napi
);
110 reschedule
|= gve_tx_poll(block
, budget
);
112 reschedule
|= gve_rx_poll(block
, budget
);
118 irq_doorbell
= gve_irq_doorbell(priv
, block
);
119 iowrite32be(GVE_IRQ_ACK
| GVE_IRQ_EVENT
, irq_doorbell
);
121 /* Double check we have no extra work.
122 * Ensure unmask synchronizes with checking for work.
126 reschedule
|= gve_tx_poll(block
, -1);
128 reschedule
|= gve_rx_poll(block
, -1);
129 if (reschedule
&& napi_reschedule(napi
))
130 iowrite32be(GVE_IRQ_MASK
, irq_doorbell
);
135 static int gve_alloc_notify_blocks(struct gve_priv
*priv
)
137 int num_vecs_requested
= priv
->num_ntfy_blks
+ 1;
138 char *name
= priv
->dev
->name
;
139 unsigned int active_cpus
;
144 priv
->msix_vectors
= kvzalloc(num_vecs_requested
*
145 sizeof(*priv
->msix_vectors
), GFP_KERNEL
);
146 if (!priv
->msix_vectors
)
148 for (i
= 0; i
< num_vecs_requested
; i
++)
149 priv
->msix_vectors
[i
].entry
= i
;
150 vecs_enabled
= pci_enable_msix_range(priv
->pdev
, priv
->msix_vectors
,
151 GVE_MIN_MSIX
, num_vecs_requested
);
152 if (vecs_enabled
< 0) {
153 dev_err(&priv
->pdev
->dev
, "Could not enable min msix %d/%d\n",
154 GVE_MIN_MSIX
, vecs_enabled
);
156 goto abort_with_msix_vectors
;
158 if (vecs_enabled
!= num_vecs_requested
) {
159 int new_num_ntfy_blks
= (vecs_enabled
- 1) & ~0x1;
160 int vecs_per_type
= new_num_ntfy_blks
/ 2;
161 int vecs_left
= new_num_ntfy_blks
% 2;
163 priv
->num_ntfy_blks
= new_num_ntfy_blks
;
164 priv
->tx_cfg
.max_queues
= min_t(int, priv
->tx_cfg
.max_queues
,
166 priv
->rx_cfg
.max_queues
= min_t(int, priv
->rx_cfg
.max_queues
,
167 vecs_per_type
+ vecs_left
);
168 dev_err(&priv
->pdev
->dev
,
169 "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
170 vecs_enabled
, priv
->tx_cfg
.max_queues
,
171 priv
->rx_cfg
.max_queues
);
172 if (priv
->tx_cfg
.num_queues
> priv
->tx_cfg
.max_queues
)
173 priv
->tx_cfg
.num_queues
= priv
->tx_cfg
.max_queues
;
174 if (priv
->rx_cfg
.num_queues
> priv
->rx_cfg
.max_queues
)
175 priv
->rx_cfg
.num_queues
= priv
->rx_cfg
.max_queues
;
177 /* Half the notification blocks go to TX and half to RX */
178 active_cpus
= min_t(int, priv
->num_ntfy_blks
/ 2, num_online_cpus());
180 /* Setup Management Vector - the last vector */
181 snprintf(priv
->mgmt_msix_name
, sizeof(priv
->mgmt_msix_name
), "%s-mgmnt",
183 err
= request_irq(priv
->msix_vectors
[priv
->mgmt_msix_idx
].vector
,
184 gve_mgmnt_intr
, 0, priv
->mgmt_msix_name
, priv
);
186 dev_err(&priv
->pdev
->dev
, "Did not receive management vector.\n");
187 goto abort_with_msix_enabled
;
190 dma_alloc_coherent(&priv
->pdev
->dev
,
191 priv
->num_ntfy_blks
*
192 sizeof(*priv
->ntfy_blocks
),
193 &priv
->ntfy_block_bus
, GFP_KERNEL
);
194 if (!priv
->ntfy_blocks
) {
196 goto abort_with_mgmt_vector
;
198 /* Setup the other blocks - the first n-1 vectors */
199 for (i
= 0; i
< priv
->num_ntfy_blks
; i
++) {
200 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[i
];
203 snprintf(block
->name
, sizeof(block
->name
), "%s-ntfy-block.%d",
206 err
= request_irq(priv
->msix_vectors
[msix_idx
].vector
,
207 gve_intr
, 0, block
->name
, block
);
209 dev_err(&priv
->pdev
->dev
,
210 "Failed to receive msix vector %d\n", i
);
211 goto abort_with_some_ntfy_blocks
;
213 irq_set_affinity_hint(priv
->msix_vectors
[msix_idx
].vector
,
214 get_cpu_mask(i
% active_cpus
));
217 abort_with_some_ntfy_blocks
:
218 for (j
= 0; j
< i
; j
++) {
219 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[j
];
222 irq_set_affinity_hint(priv
->msix_vectors
[msix_idx
].vector
,
224 free_irq(priv
->msix_vectors
[msix_idx
].vector
, block
);
226 dma_free_coherent(&priv
->pdev
->dev
, priv
->num_ntfy_blks
*
227 sizeof(*priv
->ntfy_blocks
),
228 priv
->ntfy_blocks
, priv
->ntfy_block_bus
);
229 priv
->ntfy_blocks
= NULL
;
230 abort_with_mgmt_vector
:
231 free_irq(priv
->msix_vectors
[priv
->mgmt_msix_idx
].vector
, priv
);
232 abort_with_msix_enabled
:
233 pci_disable_msix(priv
->pdev
);
234 abort_with_msix_vectors
:
235 kvfree(priv
->msix_vectors
);
236 priv
->msix_vectors
= NULL
;
240 static void gve_free_notify_blocks(struct gve_priv
*priv
)
245 for (i
= 0; i
< priv
->num_ntfy_blks
; i
++) {
246 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[i
];
249 irq_set_affinity_hint(priv
->msix_vectors
[msix_idx
].vector
,
251 free_irq(priv
->msix_vectors
[msix_idx
].vector
, block
);
253 dma_free_coherent(&priv
->pdev
->dev
,
254 priv
->num_ntfy_blks
* sizeof(*priv
->ntfy_blocks
),
255 priv
->ntfy_blocks
, priv
->ntfy_block_bus
);
256 priv
->ntfy_blocks
= NULL
;
257 free_irq(priv
->msix_vectors
[priv
->mgmt_msix_idx
].vector
, priv
);
258 pci_disable_msix(priv
->pdev
);
259 kvfree(priv
->msix_vectors
);
260 priv
->msix_vectors
= NULL
;
263 static int gve_setup_device_resources(struct gve_priv
*priv
)
267 err
= gve_alloc_counter_array(priv
);
270 err
= gve_alloc_notify_blocks(priv
);
272 goto abort_with_counter
;
273 err
= gve_adminq_configure_device_resources(priv
,
274 priv
->counter_array_bus
,
275 priv
->num_event_counters
,
276 priv
->ntfy_block_bus
,
277 priv
->num_ntfy_blks
);
279 dev_err(&priv
->pdev
->dev
,
280 "could not setup device_resources: err=%d\n", err
);
282 goto abort_with_ntfy_blocks
;
284 gve_set_device_resources_ok(priv
);
286 abort_with_ntfy_blocks
:
287 gve_free_notify_blocks(priv
);
289 gve_free_counter_array(priv
);
293 static void gve_trigger_reset(struct gve_priv
*priv
);
295 static void gve_teardown_device_resources(struct gve_priv
*priv
)
299 /* Tell device its resources are being freed */
300 if (gve_get_device_resources_ok(priv
)) {
301 err
= gve_adminq_deconfigure_device_resources(priv
);
303 dev_err(&priv
->pdev
->dev
,
304 "Could not deconfigure device resources: err=%d\n",
306 gve_trigger_reset(priv
);
309 gve_free_counter_array(priv
);
310 gve_free_notify_blocks(priv
);
311 gve_clear_device_resources_ok(priv
);
314 static void gve_add_napi(struct gve_priv
*priv
, int ntfy_idx
)
316 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
318 netif_napi_add(priv
->dev
, &block
->napi
, gve_napi_poll
,
322 static void gve_remove_napi(struct gve_priv
*priv
, int ntfy_idx
)
324 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
326 netif_napi_del(&block
->napi
);
329 static int gve_register_qpls(struct gve_priv
*priv
)
331 int num_qpls
= gve_num_tx_qpls(priv
) + gve_num_rx_qpls(priv
);
335 for (i
= 0; i
< num_qpls
; i
++) {
336 err
= gve_adminq_register_page_list(priv
, &priv
->qpls
[i
]);
338 netif_err(priv
, drv
, priv
->dev
,
339 "failed to register queue page list %d\n",
341 /* This failure will trigger a reset - no need to clean
350 static int gve_unregister_qpls(struct gve_priv
*priv
)
352 int num_qpls
= gve_num_tx_qpls(priv
) + gve_num_rx_qpls(priv
);
356 for (i
= 0; i
< num_qpls
; i
++) {
357 err
= gve_adminq_unregister_page_list(priv
, priv
->qpls
[i
].id
);
358 /* This failure will trigger a reset - no need to clean up */
360 netif_err(priv
, drv
, priv
->dev
,
361 "Failed to unregister queue page list %d\n",
369 static int gve_create_rings(struct gve_priv
*priv
)
374 for (i
= 0; i
< priv
->tx_cfg
.num_queues
; i
++) {
375 err
= gve_adminq_create_tx_queue(priv
, i
);
377 netif_err(priv
, drv
, priv
->dev
, "failed to create tx queue %d\n",
379 /* This failure will trigger a reset - no need to clean
384 netif_dbg(priv
, drv
, priv
->dev
, "created tx queue %d\n", i
);
386 for (i
= 0; i
< priv
->rx_cfg
.num_queues
; i
++) {
387 err
= gve_adminq_create_rx_queue(priv
, i
);
389 netif_err(priv
, drv
, priv
->dev
, "failed to create rx queue %d\n",
391 /* This failure will trigger a reset - no need to clean
396 /* Rx data ring has been prefilled with packet buffers at
397 * queue allocation time.
398 * Write the doorbell to provide descriptor slots and packet
399 * buffers to the NIC.
401 gve_rx_write_doorbell(priv
, &priv
->rx
[i
]);
402 netif_dbg(priv
, drv
, priv
->dev
, "created rx queue %d\n", i
);
408 static int gve_alloc_rings(struct gve_priv
*priv
)
415 priv
->tx
= kvzalloc(priv
->tx_cfg
.num_queues
* sizeof(*priv
->tx
),
419 err
= gve_tx_alloc_rings(priv
);
423 priv
->rx
= kvzalloc(priv
->rx_cfg
.num_queues
* sizeof(*priv
->rx
),
429 err
= gve_rx_alloc_rings(priv
);
432 /* Add tx napi & init sync stats*/
433 for (i
= 0; i
< priv
->tx_cfg
.num_queues
; i
++) {
434 u64_stats_init(&priv
->tx
[i
].statss
);
435 ntfy_idx
= gve_tx_idx_to_ntfy(priv
, i
);
436 gve_add_napi(priv
, ntfy_idx
);
438 /* Add rx napi & init sync stats*/
439 for (i
= 0; i
< priv
->rx_cfg
.num_queues
; i
++) {
440 u64_stats_init(&priv
->rx
[i
].statss
);
441 ntfy_idx
= gve_rx_idx_to_ntfy(priv
, i
);
442 gve_add_napi(priv
, ntfy_idx
);
451 gve_tx_free_rings(priv
);
458 static int gve_destroy_rings(struct gve_priv
*priv
)
463 for (i
= 0; i
< priv
->tx_cfg
.num_queues
; i
++) {
464 err
= gve_adminq_destroy_tx_queue(priv
, i
);
466 netif_err(priv
, drv
, priv
->dev
,
467 "failed to destroy tx queue %d\n",
469 /* This failure will trigger a reset - no need to clean
474 netif_dbg(priv
, drv
, priv
->dev
, "destroyed tx queue %d\n", i
);
476 for (i
= 0; i
< priv
->rx_cfg
.num_queues
; i
++) {
477 err
= gve_adminq_destroy_rx_queue(priv
, i
);
479 netif_err(priv
, drv
, priv
->dev
,
480 "failed to destroy rx queue %d\n",
482 /* This failure will trigger a reset - no need to clean
487 netif_dbg(priv
, drv
, priv
->dev
, "destroyed rx queue %d\n", i
);
492 static void gve_free_rings(struct gve_priv
*priv
)
498 for (i
= 0; i
< priv
->tx_cfg
.num_queues
; i
++) {
499 ntfy_idx
= gve_tx_idx_to_ntfy(priv
, i
);
500 gve_remove_napi(priv
, ntfy_idx
);
502 gve_tx_free_rings(priv
);
507 for (i
= 0; i
< priv
->rx_cfg
.num_queues
; i
++) {
508 ntfy_idx
= gve_rx_idx_to_ntfy(priv
, i
);
509 gve_remove_napi(priv
, ntfy_idx
);
511 gve_rx_free_rings(priv
);
517 int gve_alloc_page(struct device
*dev
, struct page
**page
, dma_addr_t
*dma
,
518 enum dma_data_direction dir
)
520 *page
= alloc_page(GFP_KERNEL
);
523 *dma
= dma_map_page(dev
, *page
, 0, PAGE_SIZE
, dir
);
524 if (dma_mapping_error(dev
, *dma
)) {
531 static int gve_alloc_queue_page_list(struct gve_priv
*priv
, u32 id
,
534 struct gve_queue_page_list
*qpl
= &priv
->qpls
[id
];
538 if (pages
+ priv
->num_registered_pages
> priv
->max_registered_pages
) {
539 netif_err(priv
, drv
, priv
->dev
,
540 "Reached max number of registered pages %llu > %llu\n",
541 pages
+ priv
->num_registered_pages
,
542 priv
->max_registered_pages
);
547 qpl
->num_entries
= 0;
548 qpl
->pages
= kvzalloc(pages
* sizeof(*qpl
->pages
), GFP_KERNEL
);
549 /* caller handles clean up */
552 qpl
->page_buses
= kvzalloc(pages
* sizeof(*qpl
->page_buses
),
554 /* caller handles clean up */
555 if (!qpl
->page_buses
)
558 for (i
= 0; i
< pages
; i
++) {
559 err
= gve_alloc_page(&priv
->pdev
->dev
, &qpl
->pages
[i
],
561 gve_qpl_dma_dir(priv
, id
));
562 /* caller handles clean up */
567 priv
->num_registered_pages
+= pages
;
572 void gve_free_page(struct device
*dev
, struct page
*page
, dma_addr_t dma
,
573 enum dma_data_direction dir
)
575 if (!dma_mapping_error(dev
, dma
))
576 dma_unmap_page(dev
, dma
, PAGE_SIZE
, dir
);
581 static void gve_free_queue_page_list(struct gve_priv
*priv
,
584 struct gve_queue_page_list
*qpl
= &priv
->qpls
[id
];
589 if (!qpl
->page_buses
)
592 for (i
= 0; i
< qpl
->num_entries
; i
++)
593 gve_free_page(&priv
->pdev
->dev
, qpl
->pages
[i
],
594 qpl
->page_buses
[i
], gve_qpl_dma_dir(priv
, id
));
596 kvfree(qpl
->page_buses
);
599 priv
->num_registered_pages
-= qpl
->num_entries
;
602 static int gve_alloc_qpls(struct gve_priv
*priv
)
604 int num_qpls
= gve_num_tx_qpls(priv
) + gve_num_rx_qpls(priv
);
608 priv
->qpls
= kvzalloc(num_qpls
* sizeof(*priv
->qpls
), GFP_KERNEL
);
612 for (i
= 0; i
< gve_num_tx_qpls(priv
); i
++) {
613 err
= gve_alloc_queue_page_list(priv
, i
,
614 priv
->tx_pages_per_qpl
);
618 for (; i
< num_qpls
; i
++) {
619 err
= gve_alloc_queue_page_list(priv
, i
,
620 priv
->rx_pages_per_qpl
);
625 priv
->qpl_cfg
.qpl_map_size
= BITS_TO_LONGS(num_qpls
) *
626 sizeof(unsigned long) * BITS_PER_BYTE
;
627 priv
->qpl_cfg
.qpl_id_map
= kvzalloc(BITS_TO_LONGS(num_qpls
) *
628 sizeof(unsigned long), GFP_KERNEL
);
629 if (!priv
->qpl_cfg
.qpl_id_map
) {
637 for (j
= 0; j
<= i
; j
++)
638 gve_free_queue_page_list(priv
, j
);
643 static void gve_free_qpls(struct gve_priv
*priv
)
645 int num_qpls
= gve_num_tx_qpls(priv
) + gve_num_rx_qpls(priv
);
648 kvfree(priv
->qpl_cfg
.qpl_id_map
);
650 for (i
= 0; i
< num_qpls
; i
++)
651 gve_free_queue_page_list(priv
, i
);
656 /* Use this to schedule a reset when the device is capable of continuing
657 * to handle other requests in its current state. If it is not, do a reset
660 void gve_schedule_reset(struct gve_priv
*priv
)
662 gve_set_do_reset(priv
);
663 queue_work(priv
->gve_wq
, &priv
->service_task
);
666 static void gve_reset_and_teardown(struct gve_priv
*priv
, bool was_up
);
667 static int gve_reset_recovery(struct gve_priv
*priv
, bool was_up
);
668 static void gve_turndown(struct gve_priv
*priv
);
669 static void gve_turnup(struct gve_priv
*priv
);
671 static int gve_open(struct net_device
*dev
)
673 struct gve_priv
*priv
= netdev_priv(dev
);
676 err
= gve_alloc_qpls(priv
);
679 err
= gve_alloc_rings(priv
);
683 err
= netif_set_real_num_tx_queues(dev
, priv
->tx_cfg
.num_queues
);
686 err
= netif_set_real_num_rx_queues(dev
, priv
->rx_cfg
.num_queues
);
690 err
= gve_register_qpls(priv
);
693 err
= gve_create_rings(priv
);
696 gve_set_device_rings_ok(priv
);
699 netif_carrier_on(dev
);
703 gve_free_rings(priv
);
709 /* This must have been called from a reset due to the rtnl lock
710 * so just return at this point.
712 if (gve_get_reset_in_progress(priv
))
714 /* Otherwise reset before returning */
715 gve_reset_and_teardown(priv
, true);
716 /* if this fails there is nothing we can do so just ignore the return */
717 gve_reset_recovery(priv
, false);
718 /* return the original error */
722 static int gve_close(struct net_device
*dev
)
724 struct gve_priv
*priv
= netdev_priv(dev
);
727 netif_carrier_off(dev
);
728 if (gve_get_device_rings_ok(priv
)) {
730 err
= gve_destroy_rings(priv
);
733 err
= gve_unregister_qpls(priv
);
736 gve_clear_device_rings_ok(priv
);
739 gve_free_rings(priv
);
744 /* This must have been called from a reset due to the rtnl lock
745 * so just return at this point.
747 if (gve_get_reset_in_progress(priv
))
749 /* Otherwise reset before returning */
750 gve_reset_and_teardown(priv
, true);
751 return gve_reset_recovery(priv
, false);
754 int gve_adjust_queues(struct gve_priv
*priv
,
755 struct gve_queue_config new_rx_config
,
756 struct gve_queue_config new_tx_config
)
760 if (netif_carrier_ok(priv
->dev
)) {
761 /* To make this process as simple as possible we teardown the
762 * device, set the new configuration, and then bring the device
765 err
= gve_close(priv
->dev
);
766 /* we have already tried to reset in close,
767 * just fail at this point
771 priv
->tx_cfg
= new_tx_config
;
772 priv
->rx_cfg
= new_rx_config
;
774 err
= gve_open(priv
->dev
);
780 /* Set the config for the next up. */
781 priv
->tx_cfg
= new_tx_config
;
782 priv
->rx_cfg
= new_rx_config
;
786 netif_err(priv
, drv
, priv
->dev
,
787 "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
792 static void gve_turndown(struct gve_priv
*priv
)
796 if (netif_carrier_ok(priv
->dev
))
797 netif_carrier_off(priv
->dev
);
799 if (!gve_get_napi_enabled(priv
))
802 /* Disable napi to prevent more work from coming in */
803 for (idx
= 0; idx
< priv
->tx_cfg
.num_queues
; idx
++) {
804 int ntfy_idx
= gve_tx_idx_to_ntfy(priv
, idx
);
805 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
807 napi_disable(&block
->napi
);
809 for (idx
= 0; idx
< priv
->rx_cfg
.num_queues
; idx
++) {
810 int ntfy_idx
= gve_rx_idx_to_ntfy(priv
, idx
);
811 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
813 napi_disable(&block
->napi
);
817 netif_tx_disable(priv
->dev
);
819 gve_clear_napi_enabled(priv
);
822 static void gve_turnup(struct gve_priv
*priv
)
826 /* Start the tx queues */
827 netif_tx_start_all_queues(priv
->dev
);
829 /* Enable napi and unmask interrupts for all queues */
830 for (idx
= 0; idx
< priv
->tx_cfg
.num_queues
; idx
++) {
831 int ntfy_idx
= gve_tx_idx_to_ntfy(priv
, idx
);
832 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
834 napi_enable(&block
->napi
);
835 iowrite32be(0, gve_irq_doorbell(priv
, block
));
837 for (idx
= 0; idx
< priv
->rx_cfg
.num_queues
; idx
++) {
838 int ntfy_idx
= gve_rx_idx_to_ntfy(priv
, idx
);
839 struct gve_notify_block
*block
= &priv
->ntfy_blocks
[ntfy_idx
];
841 napi_enable(&block
->napi
);
842 iowrite32be(0, gve_irq_doorbell(priv
, block
));
845 gve_set_napi_enabled(priv
);
848 static void gve_tx_timeout(struct net_device
*dev
, unsigned int txqueue
)
850 struct gve_priv
*priv
= netdev_priv(dev
);
852 gve_schedule_reset(priv
);
853 priv
->tx_timeo_cnt
++;
856 static const struct net_device_ops gve_netdev_ops
= {
857 .ndo_start_xmit
= gve_tx
,
858 .ndo_open
= gve_open
,
859 .ndo_stop
= gve_close
,
860 .ndo_get_stats64
= gve_get_stats
,
861 .ndo_tx_timeout
= gve_tx_timeout
,
864 static void gve_handle_status(struct gve_priv
*priv
, u32 status
)
866 if (GVE_DEVICE_STATUS_RESET_MASK
& status
) {
867 dev_info(&priv
->pdev
->dev
, "Device requested reset.\n");
868 gve_set_do_reset(priv
);
872 static void gve_handle_reset(struct gve_priv
*priv
)
874 /* A service task will be scheduled at the end of probe to catch any
875 * resets that need to happen, and we don't want to reset until
878 if (gve_get_probe_in_progress(priv
))
881 if (gve_get_do_reset(priv
)) {
883 gve_reset(priv
, false);
888 /* Handle NIC status register changes and reset requests */
889 static void gve_service_task(struct work_struct
*work
)
891 struct gve_priv
*priv
= container_of(work
, struct gve_priv
,
894 gve_handle_status(priv
,
895 ioread32be(&priv
->reg_bar0
->device_status
));
897 gve_handle_reset(priv
);
900 static int gve_init_priv(struct gve_priv
*priv
, bool skip_describe_device
)
905 /* Set up the adminq */
906 err
= gve_adminq_alloc(&priv
->pdev
->dev
, priv
);
908 dev_err(&priv
->pdev
->dev
,
909 "Failed to alloc admin queue: err=%d\n", err
);
913 if (skip_describe_device
)
916 /* Get the initial information we need from the device */
917 err
= gve_adminq_describe_device(priv
);
919 dev_err(&priv
->pdev
->dev
,
920 "Could not get device information: err=%d\n", err
);
923 if (priv
->dev
->max_mtu
> PAGE_SIZE
) {
924 priv
->dev
->max_mtu
= PAGE_SIZE
;
925 err
= gve_adminq_set_mtu(priv
, priv
->dev
->mtu
);
927 netif_err(priv
, drv
, priv
->dev
, "Could not set mtu");
931 priv
->dev
->mtu
= priv
->dev
->max_mtu
;
932 num_ntfy
= pci_msix_vec_count(priv
->pdev
);
934 dev_err(&priv
->pdev
->dev
,
935 "could not count MSI-x vectors: err=%d\n", num_ntfy
);
938 } else if (num_ntfy
< GVE_MIN_MSIX
) {
939 dev_err(&priv
->pdev
->dev
, "gve needs at least %d MSI-x vectors, but only has %d\n",
940 GVE_MIN_MSIX
, num_ntfy
);
945 priv
->num_registered_pages
= 0;
946 priv
->rx_copybreak
= GVE_DEFAULT_RX_COPYBREAK
;
947 /* gvnic has one Notification Block per MSI-x vector, except for the
950 priv
->num_ntfy_blks
= (num_ntfy
- 1) & ~0x1;
951 priv
->mgmt_msix_idx
= priv
->num_ntfy_blks
;
953 priv
->tx_cfg
.max_queues
=
954 min_t(int, priv
->tx_cfg
.max_queues
, priv
->num_ntfy_blks
/ 2);
955 priv
->rx_cfg
.max_queues
=
956 min_t(int, priv
->rx_cfg
.max_queues
, priv
->num_ntfy_blks
/ 2);
958 priv
->tx_cfg
.num_queues
= priv
->tx_cfg
.max_queues
;
959 priv
->rx_cfg
.num_queues
= priv
->rx_cfg
.max_queues
;
960 if (priv
->default_num_queues
> 0) {
961 priv
->tx_cfg
.num_queues
= min_t(int, priv
->default_num_queues
,
962 priv
->tx_cfg
.num_queues
);
963 priv
->rx_cfg
.num_queues
= min_t(int, priv
->default_num_queues
,
964 priv
->rx_cfg
.num_queues
);
967 netif_info(priv
, drv
, priv
->dev
, "TX queues %d, RX queues %d\n",
968 priv
->tx_cfg
.num_queues
, priv
->rx_cfg
.num_queues
);
969 netif_info(priv
, drv
, priv
->dev
, "Max TX queues %d, Max RX queues %d\n",
970 priv
->tx_cfg
.max_queues
, priv
->rx_cfg
.max_queues
);
973 err
= gve_setup_device_resources(priv
);
977 gve_adminq_free(&priv
->pdev
->dev
, priv
);
981 static void gve_teardown_priv_resources(struct gve_priv
*priv
)
983 gve_teardown_device_resources(priv
);
984 gve_adminq_free(&priv
->pdev
->dev
, priv
);
987 static void gve_trigger_reset(struct gve_priv
*priv
)
989 /* Reset the device by releasing the AQ */
990 gve_adminq_release(priv
);
993 static void gve_reset_and_teardown(struct gve_priv
*priv
, bool was_up
)
995 gve_trigger_reset(priv
);
996 /* With the reset having already happened, close cannot fail */
998 gve_close(priv
->dev
);
999 gve_teardown_priv_resources(priv
);
1002 static int gve_reset_recovery(struct gve_priv
*priv
, bool was_up
)
1006 err
= gve_init_priv(priv
, true);
1010 err
= gve_open(priv
->dev
);
1016 dev_err(&priv
->pdev
->dev
, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
1021 int gve_reset(struct gve_priv
*priv
, bool attempt_teardown
)
1023 bool was_up
= netif_carrier_ok(priv
->dev
);
1026 dev_info(&priv
->pdev
->dev
, "Performing reset\n");
1027 gve_clear_do_reset(priv
);
1028 gve_set_reset_in_progress(priv
);
1029 /* If we aren't attempting to teardown normally, just go turndown and
1032 if (!attempt_teardown
) {
1034 gve_reset_and_teardown(priv
, was_up
);
1036 /* Otherwise attempt to close normally */
1038 err
= gve_close(priv
->dev
);
1039 /* If that fails reset as we did above */
1041 gve_reset_and_teardown(priv
, was_up
);
1043 /* Clean up any remaining resources */
1044 gve_teardown_priv_resources(priv
);
1047 /* Set it all back up */
1048 err
= gve_reset_recovery(priv
, was_up
);
1049 gve_clear_reset_in_progress(priv
);
1053 static void gve_write_version(u8 __iomem
*driver_version_register
)
1055 const char *c
= gve_version_prefix
;
1058 writeb(*c
, driver_version_register
);
1062 c
= gve_version_str
;
1064 writeb(*c
, driver_version_register
);
1067 writeb('\n', driver_version_register
);
1070 static int gve_probe(struct pci_dev
*pdev
, const struct pci_device_id
*ent
)
1072 int max_tx_queues
, max_rx_queues
;
1073 struct net_device
*dev
;
1074 __be32 __iomem
*db_bar
;
1075 struct gve_registers __iomem
*reg_bar
;
1076 struct gve_priv
*priv
;
1079 err
= pci_enable_device(pdev
);
1083 err
= pci_request_regions(pdev
, "gvnic-cfg");
1085 goto abort_with_enabled
;
1087 pci_set_master(pdev
);
1089 err
= pci_set_dma_mask(pdev
, DMA_BIT_MASK(64));
1091 dev_err(&pdev
->dev
, "Failed to set dma mask: err=%d\n", err
);
1092 goto abort_with_pci_region
;
1095 err
= pci_set_consistent_dma_mask(pdev
, DMA_BIT_MASK(64));
1098 "Failed to set consistent dma mask: err=%d\n", err
);
1099 goto abort_with_pci_region
;
1102 reg_bar
= pci_iomap(pdev
, GVE_REGISTER_BAR
, 0);
1104 dev_err(&pdev
->dev
, "Failed to map pci bar!\n");
1106 goto abort_with_pci_region
;
1109 db_bar
= pci_iomap(pdev
, GVE_DOORBELL_BAR
, 0);
1111 dev_err(&pdev
->dev
, "Failed to map doorbell bar!\n");
1113 goto abort_with_reg_bar
;
1116 gve_write_version(®_bar
->driver_version
);
1117 /* Get max queues to alloc etherdev */
1118 max_rx_queues
= ioread32be(®_bar
->max_tx_queues
);
1119 max_tx_queues
= ioread32be(®_bar
->max_rx_queues
);
1120 /* Alloc and setup the netdev and priv */
1121 dev
= alloc_etherdev_mqs(sizeof(*priv
), max_tx_queues
, max_rx_queues
);
1123 dev_err(&pdev
->dev
, "could not allocate netdev\n");
1124 goto abort_with_db_bar
;
1126 SET_NETDEV_DEV(dev
, &pdev
->dev
);
1127 pci_set_drvdata(pdev
, dev
);
1128 dev
->ethtool_ops
= &gve_ethtool_ops
;
1129 dev
->netdev_ops
= &gve_netdev_ops
;
1130 /* advertise features */
1131 dev
->hw_features
= NETIF_F_HIGHDMA
;
1132 dev
->hw_features
|= NETIF_F_SG
;
1133 dev
->hw_features
|= NETIF_F_HW_CSUM
;
1134 dev
->hw_features
|= NETIF_F_TSO
;
1135 dev
->hw_features
|= NETIF_F_TSO6
;
1136 dev
->hw_features
|= NETIF_F_TSO_ECN
;
1137 dev
->hw_features
|= NETIF_F_RXCSUM
;
1138 dev
->hw_features
|= NETIF_F_RXHASH
;
1139 dev
->features
= dev
->hw_features
;
1140 dev
->watchdog_timeo
= 5 * HZ
;
1141 dev
->min_mtu
= ETH_MIN_MTU
;
1142 netif_carrier_off(dev
);
1144 priv
= netdev_priv(dev
);
1147 priv
->msg_enable
= DEFAULT_MSG_LEVEL
;
1148 priv
->reg_bar0
= reg_bar
;
1149 priv
->db_bar2
= db_bar
;
1150 priv
->service_task_flags
= 0x0;
1151 priv
->state_flags
= 0x0;
1153 gve_set_probe_in_progress(priv
);
1154 priv
->gve_wq
= alloc_ordered_workqueue("gve", 0);
1155 if (!priv
->gve_wq
) {
1156 dev_err(&pdev
->dev
, "Could not allocate workqueue");
1158 goto abort_with_netdev
;
1160 INIT_WORK(&priv
->service_task
, gve_service_task
);
1161 priv
->tx_cfg
.max_queues
= max_tx_queues
;
1162 priv
->rx_cfg
.max_queues
= max_rx_queues
;
1164 err
= gve_init_priv(priv
, false);
1168 err
= register_netdev(dev
);
1172 dev_info(&pdev
->dev
, "GVE version %s\n", gve_version_str
);
1173 gve_clear_probe_in_progress(priv
);
1174 queue_work(priv
->gve_wq
, &priv
->service_task
);
1178 destroy_workqueue(priv
->gve_wq
);
1184 pci_iounmap(pdev
, db_bar
);
1187 pci_iounmap(pdev
, reg_bar
);
1189 abort_with_pci_region
:
1190 pci_release_regions(pdev
);
1193 pci_disable_device(pdev
);
1197 static void gve_remove(struct pci_dev
*pdev
)
1199 struct net_device
*netdev
= pci_get_drvdata(pdev
);
1200 struct gve_priv
*priv
= netdev_priv(netdev
);
1201 __be32 __iomem
*db_bar
= priv
->db_bar2
;
1202 void __iomem
*reg_bar
= priv
->reg_bar0
;
1204 unregister_netdev(netdev
);
1205 gve_teardown_priv_resources(priv
);
1206 destroy_workqueue(priv
->gve_wq
);
1207 free_netdev(netdev
);
1208 pci_iounmap(pdev
, db_bar
);
1209 pci_iounmap(pdev
, reg_bar
);
1210 pci_release_regions(pdev
);
1211 pci_disable_device(pdev
);
1214 static const struct pci_device_id gve_id_table
[] = {
1215 { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE
, PCI_DEV_ID_GVNIC
) },
1219 static struct pci_driver gvnic_driver
= {
1221 .id_table
= gve_id_table
,
1223 .remove
= gve_remove
,
1226 module_pci_driver(gvnic_driver
);
1228 MODULE_DEVICE_TABLE(pci
, gve_id_table
);
1229 MODULE_AUTHOR("Google, Inc.");
1230 MODULE_DESCRIPTION("gVNIC Driver");
1231 MODULE_LICENSE("Dual MIT/GPL");
1232 MODULE_VERSION(GVE_VERSION
);