1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
4 #include <linux/crash_dump.h>
5 #include <linux/etherdevice.h>
6 #include <linux/ethtool.h>
7 #include <linux/filter.h>
9 #include <linux/if_vlan.h>
10 #include <linux/module.h>
11 #include <linux/netdevice.h>
12 #include <linux/pci.h>
13 #include <linux/rtnetlink.h>
14 #include <linux/inetdevice.h>
17 #include "funeth_devlink.h"
18 #include "funeth_ktls.h"
20 #include "fun_queue.h"
21 #include "funeth_txrx.h"
23 #define ADMIN_SQ_DEPTH 32
24 #define ADMIN_CQ_DEPTH 64
25 #define ADMIN_RQ_DEPTH 16
27 /* Default number of Tx/Rx queues. */
28 #define FUN_DFLT_QUEUES 16U
31 FUN_SERV_RES_CHANGE
= FUN_SERV_FIRST_AVAIL
,
35 static const struct pci_device_id funeth_id_table
[] = {
36 { PCI_VDEVICE(FUNGIBLE
, 0x0101) },
37 { PCI_VDEVICE(FUNGIBLE
, 0x0181) },
41 /* Issue a port write admin command with @n key/value pairs. */
42 static int fun_port_write_cmds(struct funeth_priv
*fp
, unsigned int n
,
43 const int *keys
, const u64
*data
)
45 unsigned int cmd_size
, i
;
47 struct fun_admin_port_req req
;
48 struct fun_admin_port_rsp rsp
;
52 cmd_size
= offsetof(struct fun_admin_port_req
, u
.write
.write48
) +
53 n
* sizeof(struct fun_admin_write48_req
);
54 if (cmd_size
> sizeof(cmd
) || cmd_size
> ADMIN_RSP_MAX_LEN
)
57 cmd
.req
.common
= FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT
,
60 FUN_ADMIN_PORT_WRITE_REQ_INIT(FUN_ADMIN_SUBOP_WRITE
, 0,
61 fp
->netdev
->dev_port
);
62 for (i
= 0; i
< n
; i
++)
63 cmd
.req
.u
.write
.write48
[i
] =
64 FUN_ADMIN_WRITE48_REQ_INIT(keys
[i
], data
[i
]);
66 return fun_submit_admin_sync_cmd(fp
->fdev
, &cmd
.req
.common
,
67 &cmd
.rsp
, cmd_size
, 0);
70 int fun_port_write_cmd(struct funeth_priv
*fp
, int key
, u64 data
)
72 return fun_port_write_cmds(fp
, 1, &key
, &data
);
75 /* Issue a port read admin command with @n key/value pairs. */
76 static int fun_port_read_cmds(struct funeth_priv
*fp
, unsigned int n
,
77 const int *keys
, u64
*data
)
79 const struct fun_admin_read48_rsp
*r48rsp
;
80 unsigned int cmd_size
, i
;
83 struct fun_admin_port_req req
;
84 struct fun_admin_port_rsp rsp
;
88 cmd_size
= offsetof(struct fun_admin_port_req
, u
.read
.read48
) +
89 n
* sizeof(struct fun_admin_read48_req
);
90 if (cmd_size
> sizeof(cmd
) || cmd_size
> ADMIN_RSP_MAX_LEN
)
93 cmd
.req
.common
= FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT
,
96 FUN_ADMIN_PORT_READ_REQ_INIT(FUN_ADMIN_SUBOP_READ
, 0,
97 fp
->netdev
->dev_port
);
98 for (i
= 0; i
< n
; i
++)
99 cmd
.req
.u
.read
.read48
[i
] = FUN_ADMIN_READ48_REQ_INIT(keys
[i
]);
101 rc
= fun_submit_admin_sync_cmd(fp
->fdev
, &cmd
.req
.common
,
102 &cmd
.rsp
, cmd_size
, 0);
106 for (r48rsp
= cmd
.rsp
.u
.read
.read48
, i
= 0; i
< n
; i
++, r48rsp
++) {
107 data
[i
] = FUN_ADMIN_READ48_RSP_DATA_G(r48rsp
->key_to_data
);
108 dev_dbg(fp
->fdev
->dev
,
109 "port_read_rsp lport=%u (key_to_data=0x%llx) key=%d data:%lld retval:%lld",
110 fp
->lport
, r48rsp
->key_to_data
, keys
[i
], data
[i
],
111 FUN_ADMIN_READ48_RSP_RET_G(r48rsp
->key_to_data
));
116 int fun_port_read_cmd(struct funeth_priv
*fp
, int key
, u64
*data
)
118 return fun_port_read_cmds(fp
, 1, &key
, data
);
121 static void fun_report_link(struct net_device
*netdev
)
123 if (netif_carrier_ok(netdev
)) {
124 const struct funeth_priv
*fp
= netdev_priv(netdev
);
125 const char *fec
= "", *pause
= "";
126 int speed
= fp
->link_speed
;
129 if (fp
->link_speed
>= SPEED_1000
) {
134 if (fp
->active_fec
& FUN_PORT_FEC_RS
)
136 else if (fp
->active_fec
& FUN_PORT_FEC_FC
)
139 if ((fp
->active_fc
& FUN_PORT_CAP_PAUSE_MASK
) == FUN_PORT_CAP_PAUSE_MASK
)
140 pause
= ", Tx/Rx PAUSE";
141 else if (fp
->active_fc
& FUN_PORT_CAP_RX_PAUSE
)
142 pause
= ", Rx PAUSE";
143 else if (fp
->active_fc
& FUN_PORT_CAP_TX_PAUSE
)
144 pause
= ", Tx PAUSE";
146 netdev_info(netdev
, "Link up at %d %cb/s full-duplex%s%s\n",
147 speed
, unit
, pause
, fec
);
149 netdev_info(netdev
, "Link down\n");
153 static int fun_adi_write(struct fun_dev
*fdev
, enum fun_admin_adi_attr attr
,
154 unsigned int adi_id
, const struct fun_adi_param
*param
)
156 struct fun_admin_adi_req req
= {
157 .common
= FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ADI
,
159 .u
.write
.subop
= FUN_ADMIN_SUBOP_WRITE
,
160 .u
.write
.attribute
= attr
,
161 .u
.write
.id
= cpu_to_be32(adi_id
),
162 .u
.write
.param
= *param
165 return fun_submit_admin_sync_cmd(fdev
, &req
.common
, NULL
, 0, 0);
168 /* Configure RSS for the given port. @op determines whether a new RSS context
169 * is to be created or whether an existing one should be reconfigured. The
170 * remaining parameters specify the hashing algorithm, key, and indirection
173 * This initiates packet delivery to the Rx queues set in the indirection
176 int fun_config_rss(struct net_device
*dev
, int algo
, const u8
*key
,
177 const u32
*qtable
, u8 op
)
179 struct funeth_priv
*fp
= netdev_priv(dev
);
180 unsigned int table_len
= fp
->indir_table_nentries
;
181 unsigned int len
= FUN_ETH_RSS_MAX_KEY_SIZE
+ sizeof(u32
) * table_len
;
182 struct funeth_rxq
**rxqs
= rtnl_dereference(fp
->rxqs
);
185 struct fun_admin_rss_req req
;
186 struct fun_dataop_gl gl
;
188 struct fun_admin_generic_create_rsp rsp
;
194 if (op
!= FUN_ADMIN_SUBOP_CREATE
&& fp
->rss_hw_id
== FUN_HCI_ID_INVALID
)
197 flags
= op
== FUN_ADMIN_SUBOP_CREATE
?
198 FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR
: 0;
199 cmd
.req
.common
= FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_RSS
,
202 FUN_ADMIN_RSS_CREATE_REQ_INIT(op
, flags
, fp
->rss_hw_id
,
204 FUN_ETH_RSS_MAX_KEY_SIZE
,
206 FUN_ETH_RSS_MAX_KEY_SIZE
);
207 cmd
.req
.u
.create
.dataop
= FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len
);
208 fun_dataop_gl_init(&cmd
.gl
, 0, 0, len
, fp
->rss_dma_addr
);
210 /* write the key and indirection table into the RSS DMA area */
211 memcpy(fp
->rss_cfg
, key
, FUN_ETH_RSS_MAX_KEY_SIZE
);
212 indir_tab
= fp
->rss_cfg
+ FUN_ETH_RSS_MAX_KEY_SIZE
;
213 for (rc
= 0; rc
< table_len
; rc
++)
214 *indir_tab
++ = cpu_to_be32(rxqs
[*qtable
++]->hw_cqid
);
216 rc
= fun_submit_admin_sync_cmd(fp
->fdev
, &cmd
.req
.common
,
217 &cmd
.rsp
, sizeof(cmd
.rsp
), 0);
218 if (!rc
&& op
== FUN_ADMIN_SUBOP_CREATE
)
219 fp
->rss_hw_id
= be32_to_cpu(cmd
.rsp
.id
);
223 /* Destroy the HW RSS conntext associated with the given port. This also stops
224 * all packet delivery to our Rx queues.
226 static void fun_destroy_rss(struct funeth_priv
*fp
)
228 if (fp
->rss_hw_id
!= FUN_HCI_ID_INVALID
) {
229 fun_res_destroy(fp
->fdev
, FUN_ADMIN_OP_RSS
, 0, fp
->rss_hw_id
);
230 fp
->rss_hw_id
= FUN_HCI_ID_INVALID
;
234 static void fun_irq_aff_notify(struct irq_affinity_notify
*notify
,
235 const cpumask_t
*mask
)
237 struct fun_irq
*p
= container_of(notify
, struct fun_irq
, aff_notify
);
239 cpumask_copy(&p
->affinity_mask
, mask
);
242 static void fun_irq_aff_release(struct kref __always_unused
*ref
)
246 /* Allocate an IRQ structure, assign an MSI-X index and initial affinity to it,
247 * and add it to the IRQ XArray.
249 static struct fun_irq
*fun_alloc_qirq(struct funeth_priv
*fp
, unsigned int idx
,
250 int node
, unsigned int xa_idx_offset
)
255 cpu
= cpumask_local_spread(idx
, node
);
256 node
= cpu_to_mem(cpu
);
258 irq
= kzalloc_node(sizeof(*irq
), GFP_KERNEL
, node
);
260 return ERR_PTR(-ENOMEM
);
262 res
= fun_reserve_irqs(fp
->fdev
, 1, &irq
->irq_idx
);
266 res
= xa_insert(&fp
->irqs
, idx
+ xa_idx_offset
, irq
, GFP_KERNEL
);
270 irq
->irq
= pci_irq_vector(fp
->pdev
, irq
->irq_idx
);
271 cpumask_set_cpu(cpu
, &irq
->affinity_mask
);
272 irq
->aff_notify
.notify
= fun_irq_aff_notify
;
273 irq
->aff_notify
.release
= fun_irq_aff_release
;
274 irq
->state
= FUN_IRQ_INIT
;
278 fun_release_irqs(fp
->fdev
, 1, &irq
->irq_idx
);
284 static void fun_free_qirq(struct funeth_priv
*fp
, struct fun_irq
*irq
)
286 netif_napi_del(&irq
->napi
);
287 fun_release_irqs(fp
->fdev
, 1, &irq
->irq_idx
);
291 /* Release the IRQs reserved for Tx/Rx queues that aren't being used. */
292 static void fun_prune_queue_irqs(struct net_device
*dev
)
294 struct funeth_priv
*fp
= netdev_priv(dev
);
295 unsigned int nreleased
= 0;
299 xa_for_each(&fp
->irqs
, idx
, irq
) {
300 if (irq
->txq
|| irq
->rxq
) /* skip those in use */
303 xa_erase(&fp
->irqs
, idx
);
304 fun_free_qirq(fp
, irq
);
306 if (idx
< fp
->rx_irq_ofst
)
311 netif_info(fp
, intr
, dev
, "Released %u queue IRQs\n", nreleased
);
314 /* Reserve IRQs, one per queue, to acommodate the requested queue numbers @ntx
315 * and @nrx. IRQs are added incrementally to those we already have.
316 * We hold on to allocated IRQs until garbage collection of unused IRQs is
317 * separately requested.
319 static int fun_alloc_queue_irqs(struct net_device
*dev
, unsigned int ntx
,
322 struct funeth_priv
*fp
= netdev_priv(dev
);
323 int node
= dev_to_node(&fp
->pdev
->dev
);
327 for (i
= fp
->num_tx_irqs
; i
< ntx
; i
++) {
328 irq
= fun_alloc_qirq(fp
, i
, node
, 0);
333 netif_napi_add_tx(dev
, &irq
->napi
, fun_txq_napi_poll
);
336 for (i
= fp
->num_rx_irqs
; i
< nrx
; i
++) {
337 irq
= fun_alloc_qirq(fp
, i
, node
, fp
->rx_irq_ofst
);
342 netif_napi_add(dev
, &irq
->napi
, fun_rxq_napi_poll
);
345 netif_info(fp
, intr
, dev
, "Reserved %u/%u IRQs for Tx/Rx queues\n",
350 static void free_txqs(struct funeth_txq
**txqs
, unsigned int nqs
,
351 unsigned int start
, int state
)
355 for (i
= start
; i
< nqs
&& txqs
[i
]; i
++)
356 txqs
[i
] = funeth_txq_free(txqs
[i
], state
);
359 static int alloc_txqs(struct net_device
*dev
, struct funeth_txq
**txqs
,
360 unsigned int nqs
, unsigned int depth
, unsigned int start
,
363 struct funeth_priv
*fp
= netdev_priv(dev
);
367 for (i
= start
; i
< nqs
; i
++) {
368 err
= funeth_txq_create(dev
, i
, depth
, xa_load(&fp
->irqs
, i
),
371 free_txqs(txqs
, nqs
, start
, FUN_QSTATE_DESTROYED
);
378 static void free_rxqs(struct funeth_rxq
**rxqs
, unsigned int nqs
,
379 unsigned int start
, int state
)
383 for (i
= start
; i
< nqs
&& rxqs
[i
]; i
++)
384 rxqs
[i
] = funeth_rxq_free(rxqs
[i
], state
);
387 static int alloc_rxqs(struct net_device
*dev
, struct funeth_rxq
**rxqs
,
388 unsigned int nqs
, unsigned int ncqe
, unsigned int nrqe
,
389 unsigned int start
, int state
)
391 struct funeth_priv
*fp
= netdev_priv(dev
);
395 for (i
= start
; i
< nqs
; i
++) {
396 err
= funeth_rxq_create(dev
, i
, ncqe
, nrqe
,
397 xa_load(&fp
->irqs
, i
+ fp
->rx_irq_ofst
),
400 free_rxqs(rxqs
, nqs
, start
, FUN_QSTATE_DESTROYED
);
407 static void free_xdpqs(struct funeth_txq
**xdpqs
, unsigned int nqs
,
408 unsigned int start
, int state
)
412 for (i
= start
; i
< nqs
&& xdpqs
[i
]; i
++)
413 xdpqs
[i
] = funeth_txq_free(xdpqs
[i
], state
);
415 if (state
== FUN_QSTATE_DESTROYED
)
419 static struct funeth_txq
**alloc_xdpqs(struct net_device
*dev
, unsigned int nqs
,
420 unsigned int depth
, unsigned int start
,
423 struct funeth_txq
**xdpqs
;
427 xdpqs
= kcalloc(nqs
, sizeof(*xdpqs
), GFP_KERNEL
);
429 return ERR_PTR(-ENOMEM
);
431 for (i
= start
; i
< nqs
; i
++) {
432 err
= funeth_txq_create(dev
, i
, depth
, NULL
, state
, &xdpqs
[i
]);
434 free_xdpqs(xdpqs
, nqs
, start
, FUN_QSTATE_DESTROYED
);
441 static void fun_free_rings(struct net_device
*netdev
, struct fun_qset
*qset
)
443 struct funeth_priv
*fp
= netdev_priv(netdev
);
444 struct funeth_txq
**xdpqs
= qset
->xdpqs
;
445 struct funeth_rxq
**rxqs
= qset
->rxqs
;
447 /* qset may not specify any queues to operate on. In that case the
448 * currently installed queues are implied.
451 rxqs
= rtnl_dereference(fp
->rxqs
);
452 xdpqs
= rtnl_dereference(fp
->xdpqs
);
453 qset
->txqs
= fp
->txqs
;
454 qset
->nrxqs
= netdev
->real_num_rx_queues
;
455 qset
->ntxqs
= netdev
->real_num_tx_queues
;
456 qset
->nxdpqs
= fp
->num_xdpqs
;
461 if (rxqs
== rtnl_dereference(fp
->rxqs
)) {
462 rcu_assign_pointer(fp
->rxqs
, NULL
);
463 rcu_assign_pointer(fp
->xdpqs
, NULL
);
468 free_rxqs(rxqs
, qset
->nrxqs
, qset
->rxq_start
, qset
->state
);
469 free_txqs(qset
->txqs
, qset
->ntxqs
, qset
->txq_start
, qset
->state
);
470 free_xdpqs(xdpqs
, qset
->nxdpqs
, qset
->xdpq_start
, qset
->state
);
471 if (qset
->state
== FUN_QSTATE_DESTROYED
)
474 /* Tell the caller which queues were operated on. */
479 static int fun_alloc_rings(struct net_device
*netdev
, struct fun_qset
*qset
)
481 struct funeth_txq
**xdpqs
= NULL
, **txqs
;
482 struct funeth_rxq
**rxqs
;
485 err
= fun_alloc_queue_irqs(netdev
, qset
->ntxqs
, qset
->nrxqs
);
489 rxqs
= kcalloc(qset
->ntxqs
+ qset
->nrxqs
, sizeof(*rxqs
), GFP_KERNEL
);
494 xdpqs
= alloc_xdpqs(netdev
, qset
->nxdpqs
, qset
->sq_depth
,
495 qset
->xdpq_start
, qset
->state
);
497 err
= PTR_ERR(xdpqs
);
502 txqs
= (struct funeth_txq
**)&rxqs
[qset
->nrxqs
];
503 err
= alloc_txqs(netdev
, txqs
, qset
->ntxqs
, qset
->sq_depth
,
504 qset
->txq_start
, qset
->state
);
508 err
= alloc_rxqs(netdev
, rxqs
, qset
->nrxqs
, qset
->cq_depth
,
509 qset
->rq_depth
, qset
->rxq_start
, qset
->state
);
519 free_txqs(txqs
, qset
->ntxqs
, qset
->txq_start
, FUN_QSTATE_DESTROYED
);
521 free_xdpqs(xdpqs
, qset
->nxdpqs
, qset
->xdpq_start
, FUN_QSTATE_DESTROYED
);
527 /* Take queues to the next level. Presently this means creating them on the
530 static int fun_advance_ring_state(struct net_device
*dev
, struct fun_qset
*qset
)
532 struct funeth_priv
*fp
= netdev_priv(dev
);
535 for (i
= 0; i
< qset
->nrxqs
; i
++) {
536 err
= fun_rxq_create_dev(qset
->rxqs
[i
],
538 i
+ fp
->rx_irq_ofst
));
543 for (i
= 0; i
< qset
->ntxqs
; i
++) {
544 err
= fun_txq_create_dev(qset
->txqs
[i
], xa_load(&fp
->irqs
, i
));
549 for (i
= 0; i
< qset
->nxdpqs
; i
++) {
550 err
= fun_txq_create_dev(qset
->xdpqs
[i
], NULL
);
558 fun_free_rings(dev
, qset
);
562 static int fun_port_create(struct net_device
*netdev
)
564 struct funeth_priv
*fp
= netdev_priv(netdev
);
566 struct fun_admin_port_req req
;
567 struct fun_admin_port_rsp rsp
;
571 if (fp
->lport
!= INVALID_LPORT
)
574 cmd
.req
.common
= FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT
,
577 FUN_ADMIN_PORT_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE
, 0,
580 rc
= fun_submit_admin_sync_cmd(fp
->fdev
, &cmd
.req
.common
, &cmd
.rsp
,
584 fp
->lport
= be16_to_cpu(cmd
.rsp
.u
.create
.lport
);
588 static int fun_port_destroy(struct net_device
*netdev
)
590 struct funeth_priv
*fp
= netdev_priv(netdev
);
592 if (fp
->lport
== INVALID_LPORT
)
595 fp
->lport
= INVALID_LPORT
;
596 return fun_res_destroy(fp
->fdev
, FUN_ADMIN_OP_PORT
, 0,
600 static int fun_eth_create(struct funeth_priv
*fp
)
603 struct fun_admin_eth_req req
;
604 struct fun_admin_generic_create_rsp rsp
;
608 cmd
.req
.common
= FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ETH
,
610 cmd
.req
.u
.create
= FUN_ADMIN_ETH_CREATE_REQ_INIT(
611 FUN_ADMIN_SUBOP_CREATE
,
612 FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR
,
613 0, fp
->netdev
->dev_port
);
615 rc
= fun_submit_admin_sync_cmd(fp
->fdev
, &cmd
.req
.common
, &cmd
.rsp
,
617 return rc
? rc
: be32_to_cpu(cmd
.rsp
.id
);
620 static int fun_vi_create(struct funeth_priv
*fp
)
622 struct fun_admin_vi_req req
= {
623 .common
= FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_VI
,
625 .u
.create
= FUN_ADMIN_VI_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE
,
627 fp
->netdev
->dev_port
,
628 fp
->netdev
->dev_port
)
631 return fun_submit_admin_sync_cmd(fp
->fdev
, &req
.common
, NULL
, 0, 0);
634 /* Helper to create an ETH flow and bind an SQ to it.
635 * Returns the ETH id (>= 0) on success or a negative error.
637 int fun_create_and_bind_tx(struct funeth_priv
*fp
, u32 sqid
)
641 ethid
= fun_eth_create(fp
);
643 rc
= fun_bind(fp
->fdev
, FUN_ADMIN_BIND_TYPE_EPSQ
, sqid
,
644 FUN_ADMIN_BIND_TYPE_ETH
, ethid
);
646 fun_res_destroy(fp
->fdev
, FUN_ADMIN_OP_ETH
, 0, ethid
);
653 static irqreturn_t
fun_queue_irq_handler(int irq
, void *data
)
655 struct fun_irq
*p
= data
;
658 prefetch(p
->rxq
->next_cqe_info
);
661 napi_schedule_irqoff(&p
->napi
);
665 static int fun_enable_irqs(struct net_device
*dev
)
667 struct funeth_priv
*fp
= netdev_priv(dev
);
668 unsigned long idx
, last
;
674 xa_for_each(&fp
->irqs
, idx
, p
) {
685 if (p
->state
!= FUN_IRQ_INIT
)
688 snprintf(p
->name
, sizeof(p
->name
) - 1, "%s-%s-%u", dev
->name
,
690 err
= request_irq(p
->irq
, fun_queue_irq_handler
, 0, p
->name
, p
);
692 netdev_err(dev
, "Failed to allocate IRQ %u, err %d\n",
696 p
->state
= FUN_IRQ_REQUESTED
;
699 xa_for_each(&fp
->irqs
, idx
, p
) {
700 if (p
->state
!= FUN_IRQ_REQUESTED
)
702 irq_set_affinity_notifier(p
->irq
, &p
->aff_notify
);
703 irq_set_affinity_and_hint(p
->irq
, &p
->affinity_mask
);
704 napi_enable(&p
->napi
);
705 p
->state
= FUN_IRQ_ENABLED
;
712 xa_for_each_range(&fp
->irqs
, idx
, p
, 0, last
)
713 if (p
->state
== FUN_IRQ_REQUESTED
) {
715 p
->state
= FUN_IRQ_INIT
;
721 static void fun_disable_one_irq(struct fun_irq
*irq
)
723 napi_disable(&irq
->napi
);
724 irq_set_affinity_notifier(irq
->irq
, NULL
);
725 irq_update_affinity_hint(irq
->irq
, NULL
);
726 free_irq(irq
->irq
, irq
);
727 irq
->state
= FUN_IRQ_INIT
;
730 static void fun_disable_irqs(struct net_device
*dev
)
732 struct funeth_priv
*fp
= netdev_priv(dev
);
736 xa_for_each(&fp
->irqs
, idx
, p
)
737 if (p
->state
== FUN_IRQ_ENABLED
)
738 fun_disable_one_irq(p
);
741 static void fun_down(struct net_device
*dev
, struct fun_qset
*qset
)
743 struct funeth_priv
*fp
= netdev_priv(dev
);
745 /* If we don't have queues the data path is already down.
746 * Note netif_running(dev) may be true.
748 if (!rcu_access_pointer(fp
->rxqs
))
751 /* It is also down if the queues aren't on the device. */
752 if (fp
->txqs
[0]->init_state
>= FUN_QSTATE_INIT_FULL
) {
753 netif_info(fp
, ifdown
, dev
,
754 "Tearing down data path on device\n");
755 fun_port_write_cmd(fp
, FUN_ADMIN_PORT_KEY_DISABLE
, 0);
757 netif_carrier_off(dev
);
758 netif_tx_disable(dev
);
761 fun_res_destroy(fp
->fdev
, FUN_ADMIN_OP_VI
, 0, dev
->dev_port
);
762 fun_disable_irqs(dev
);
765 fun_free_rings(dev
, qset
);
768 static int fun_up(struct net_device
*dev
, struct fun_qset
*qset
)
770 static const int port_keys
[] = {
771 FUN_ADMIN_PORT_KEY_STATS_DMA_LOW
,
772 FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH
,
773 FUN_ADMIN_PORT_KEY_ENABLE
776 struct funeth_priv
*fp
= netdev_priv(dev
);
778 lower_32_bits(fp
->stats_dma_addr
),
779 upper_32_bits(fp
->stats_dma_addr
),
780 FUN_PORT_FLAG_ENABLE_NOTIFY
784 netif_info(fp
, ifup
, dev
, "Setting up data path on device\n");
786 if (qset
->rxqs
[0]->init_state
< FUN_QSTATE_INIT_FULL
) {
787 err
= fun_advance_ring_state(dev
, qset
);
792 err
= fun_vi_create(fp
);
796 fp
->txqs
= qset
->txqs
;
797 rcu_assign_pointer(fp
->rxqs
, qset
->rxqs
);
798 rcu_assign_pointer(fp
->xdpqs
, qset
->xdpqs
);
800 err
= fun_enable_irqs(dev
);
805 err
= fun_config_rss(dev
, fp
->hash_algo
, fp
->rss_key
,
806 fp
->indir_table
, FUN_ADMIN_SUBOP_CREATE
);
808 /* The non-RSS case has only 1 queue. */
809 err
= fun_bind(fp
->fdev
, FUN_ADMIN_BIND_TYPE_VI
, dev
->dev_port
,
810 FUN_ADMIN_BIND_TYPE_EPCQ
,
811 qset
->rxqs
[0]->hw_cqid
);
816 err
= fun_port_write_cmds(fp
, 3, port_keys
, vals
);
820 netif_tx_start_all_queues(dev
);
826 fun_disable_irqs(dev
);
828 fun_res_destroy(fp
->fdev
, FUN_ADMIN_OP_VI
, 0, dev
->dev_port
);
830 fun_free_rings(dev
, qset
);
834 static int funeth_open(struct net_device
*netdev
)
836 struct funeth_priv
*fp
= netdev_priv(netdev
);
837 struct fun_qset qset
= {
838 .nrxqs
= netdev
->real_num_rx_queues
,
839 .ntxqs
= netdev
->real_num_tx_queues
,
840 .nxdpqs
= fp
->num_xdpqs
,
841 .cq_depth
= fp
->cq_depth
,
842 .rq_depth
= fp
->rq_depth
,
843 .sq_depth
= fp
->sq_depth
,
844 .state
= FUN_QSTATE_INIT_FULL
,
848 rc
= fun_alloc_rings(netdev
, &qset
);
852 rc
= fun_up(netdev
, &qset
);
854 qset
.state
= FUN_QSTATE_DESTROYED
;
855 fun_free_rings(netdev
, &qset
);
861 static int funeth_close(struct net_device
*netdev
)
863 struct fun_qset qset
= { .state
= FUN_QSTATE_DESTROYED
};
865 fun_down(netdev
, &qset
);
869 static void fun_get_stats64(struct net_device
*netdev
,
870 struct rtnl_link_stats64
*stats
)
872 struct funeth_priv
*fp
= netdev_priv(netdev
);
873 struct funeth_txq
**xdpqs
;
874 struct funeth_rxq
**rxqs
;
875 unsigned int i
, start
;
877 stats
->tx_packets
= fp
->tx_packets
;
878 stats
->tx_bytes
= fp
->tx_bytes
;
879 stats
->tx_dropped
= fp
->tx_dropped
;
881 stats
->rx_packets
= fp
->rx_packets
;
882 stats
->rx_bytes
= fp
->rx_bytes
;
883 stats
->rx_dropped
= fp
->rx_dropped
;
886 rxqs
= rcu_dereference(fp
->rxqs
);
890 for (i
= 0; i
< netdev
->real_num_tx_queues
; i
++) {
891 struct funeth_txq_stats txs
;
893 FUN_QSTAT_READ(fp
->txqs
[i
], start
, txs
);
894 stats
->tx_packets
+= txs
.tx_pkts
;
895 stats
->tx_bytes
+= txs
.tx_bytes
;
896 stats
->tx_dropped
+= txs
.tx_map_err
;
899 for (i
= 0; i
< netdev
->real_num_rx_queues
; i
++) {
900 struct funeth_rxq_stats rxs
;
902 FUN_QSTAT_READ(rxqs
[i
], start
, rxs
);
903 stats
->rx_packets
+= rxs
.rx_pkts
;
904 stats
->rx_bytes
+= rxs
.rx_bytes
;
905 stats
->rx_dropped
+= rxs
.rx_map_err
+ rxs
.rx_mem_drops
;
908 xdpqs
= rcu_dereference(fp
->xdpqs
);
912 for (i
= 0; i
< fp
->num_xdpqs
; i
++) {
913 struct funeth_txq_stats txs
;
915 FUN_QSTAT_READ(xdpqs
[i
], start
, txs
);
916 stats
->tx_packets
+= txs
.tx_pkts
;
917 stats
->tx_bytes
+= txs
.tx_bytes
;
923 static int fun_change_mtu(struct net_device
*netdev
, int new_mtu
)
925 struct funeth_priv
*fp
= netdev_priv(netdev
);
928 rc
= fun_port_write_cmd(fp
, FUN_ADMIN_PORT_KEY_MTU
, new_mtu
);
930 WRITE_ONCE(netdev
->mtu
, new_mtu
);
934 static int fun_set_macaddr(struct net_device
*netdev
, void *addr
)
936 struct funeth_priv
*fp
= netdev_priv(netdev
);
937 struct sockaddr
*saddr
= addr
;
940 if (!is_valid_ether_addr(saddr
->sa_data
))
941 return -EADDRNOTAVAIL
;
943 if (ether_addr_equal(netdev
->dev_addr
, saddr
->sa_data
))
946 rc
= fun_port_write_cmd(fp
, FUN_ADMIN_PORT_KEY_MACADDR
,
947 ether_addr_to_u64(saddr
->sa_data
));
949 eth_hw_addr_set(netdev
, saddr
->sa_data
);
953 static int fun_get_port_attributes(struct net_device
*netdev
)
955 static const int keys
[] = {
956 FUN_ADMIN_PORT_KEY_MACADDR
, FUN_ADMIN_PORT_KEY_CAPABILITIES
,
957 FUN_ADMIN_PORT_KEY_ADVERT
, FUN_ADMIN_PORT_KEY_MTU
959 static const int phys_keys
[] = {
960 FUN_ADMIN_PORT_KEY_LANE_ATTRS
,
963 struct funeth_priv
*fp
= netdev_priv(netdev
);
964 u64 data
[ARRAY_SIZE(keys
)];
968 rc
= fun_port_read_cmds(fp
, ARRAY_SIZE(keys
), keys
, data
);
972 for (i
= 0; i
< ARRAY_SIZE(keys
); i
++) {
974 case FUN_ADMIN_PORT_KEY_MACADDR
:
975 u64_to_ether_addr(data
[i
], mac
);
976 if (is_zero_ether_addr(mac
)) {
977 eth_hw_addr_random(netdev
);
978 } else if (is_valid_ether_addr(mac
)) {
979 eth_hw_addr_set(netdev
, mac
);
982 "device provided a bad MAC address %pM\n",
988 case FUN_ADMIN_PORT_KEY_CAPABILITIES
:
989 fp
->port_caps
= data
[i
];
992 case FUN_ADMIN_PORT_KEY_ADVERT
:
993 fp
->advertising
= data
[i
];
996 case FUN_ADMIN_PORT_KEY_MTU
:
997 netdev
->mtu
= data
[i
];
1002 if (!(fp
->port_caps
& FUN_PORT_CAP_VPORT
)) {
1003 rc
= fun_port_read_cmds(fp
, ARRAY_SIZE(phys_keys
), phys_keys
,
1008 fp
->lane_attrs
= data
[0];
1011 if (netdev
->addr_assign_type
== NET_ADDR_RANDOM
)
1012 return fun_port_write_cmd(fp
, FUN_ADMIN_PORT_KEY_MACADDR
,
1013 ether_addr_to_u64(netdev
->dev_addr
));
1017 static int fun_hwtstamp_get(struct net_device
*dev
, struct ifreq
*ifr
)
1019 const struct funeth_priv
*fp
= netdev_priv(dev
);
1021 return copy_to_user(ifr
->ifr_data
, &fp
->hwtstamp_cfg
,
1022 sizeof(fp
->hwtstamp_cfg
)) ? -EFAULT
: 0;
1025 static int fun_hwtstamp_set(struct net_device
*dev
, struct ifreq
*ifr
)
1027 struct funeth_priv
*fp
= netdev_priv(dev
);
1028 struct hwtstamp_config cfg
;
1030 if (copy_from_user(&cfg
, ifr
->ifr_data
, sizeof(cfg
)))
1033 /* no TX HW timestamps */
1034 cfg
.tx_type
= HWTSTAMP_TX_OFF
;
1036 switch (cfg
.rx_filter
) {
1037 case HWTSTAMP_FILTER_NONE
:
1039 case HWTSTAMP_FILTER_ALL
:
1040 case HWTSTAMP_FILTER_SOME
:
1041 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT
:
1042 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC
:
1043 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ
:
1044 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT
:
1045 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC
:
1046 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ
:
1047 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT
:
1048 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC
:
1049 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ
:
1050 case HWTSTAMP_FILTER_PTP_V2_EVENT
:
1051 case HWTSTAMP_FILTER_PTP_V2_SYNC
:
1052 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ
:
1053 case HWTSTAMP_FILTER_NTP_ALL
:
1054 cfg
.rx_filter
= HWTSTAMP_FILTER_ALL
;
1060 fp
->hwtstamp_cfg
= cfg
;
1061 return copy_to_user(ifr
->ifr_data
, &cfg
, sizeof(cfg
)) ? -EFAULT
: 0;
1064 static int fun_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
1068 return fun_hwtstamp_set(dev
, ifr
);
1070 return fun_hwtstamp_get(dev
, ifr
);
1076 /* Prepare the queues for XDP. */
1077 static int fun_enter_xdp(struct net_device
*dev
, struct bpf_prog
*prog
)
1079 struct funeth_priv
*fp
= netdev_priv(dev
);
1080 unsigned int i
, nqs
= num_online_cpus();
1081 struct funeth_txq
**xdpqs
;
1082 struct funeth_rxq
**rxqs
;
1085 xdpqs
= alloc_xdpqs(dev
, nqs
, fp
->sq_depth
, 0, FUN_QSTATE_INIT_FULL
);
1087 return PTR_ERR(xdpqs
);
1089 rxqs
= rtnl_dereference(fp
->rxqs
);
1090 for (i
= 0; i
< dev
->real_num_rx_queues
; i
++) {
1091 err
= fun_rxq_set_bpf(rxqs
[i
], prog
);
1096 fp
->num_xdpqs
= nqs
;
1097 rcu_assign_pointer(fp
->xdpqs
, xdpqs
);
1101 fun_rxq_set_bpf(rxqs
[i
], NULL
);
1103 free_xdpqs(xdpqs
, nqs
, 0, FUN_QSTATE_DESTROYED
);
1107 /* Set the queues for non-XDP operation. */
1108 static void fun_end_xdp(struct net_device
*dev
)
1110 struct funeth_priv
*fp
= netdev_priv(dev
);
1111 struct funeth_txq
**xdpqs
;
1112 struct funeth_rxq
**rxqs
;
1115 xdpqs
= rtnl_dereference(fp
->xdpqs
);
1116 rcu_assign_pointer(fp
->xdpqs
, NULL
);
1118 /* at this point both Rx and Tx XDP processing has ended */
1120 free_xdpqs(xdpqs
, fp
->num_xdpqs
, 0, FUN_QSTATE_DESTROYED
);
1123 rxqs
= rtnl_dereference(fp
->rxqs
);
1124 for (i
= 0; i
< dev
->real_num_rx_queues
; i
++)
1125 fun_rxq_set_bpf(rxqs
[i
], NULL
);
1128 #define XDP_MAX_MTU \
1129 (PAGE_SIZE - FUN_XDP_HEADROOM - VLAN_ETH_HLEN - FUN_RX_TAILROOM)
1131 static int fun_xdp_setup(struct net_device
*dev
, struct netdev_bpf
*xdp
)
1133 struct bpf_prog
*old_prog
, *prog
= xdp
->prog
;
1134 struct funeth_priv
*fp
= netdev_priv(dev
);
1137 /* XDP uses at most one buffer */
1138 if (prog
&& dev
->mtu
> XDP_MAX_MTU
) {
1139 netdev_err(dev
, "device MTU %u too large for XDP\n", dev
->mtu
);
1140 NL_SET_ERR_MSG_MOD(xdp
->extack
,
1141 "Device MTU too large for XDP");
1145 if (!netif_running(dev
)) {
1146 fp
->num_xdpqs
= prog
? num_online_cpus() : 0;
1147 } else if (prog
&& !fp
->xdp_prog
) {
1148 err
= fun_enter_xdp(dev
, prog
);
1150 NL_SET_ERR_MSG_MOD(xdp
->extack
,
1151 "Failed to set queues for XDP.");
1154 } else if (!prog
&& fp
->xdp_prog
) {
1157 struct funeth_rxq
**rxqs
= rtnl_dereference(fp
->rxqs
);
1159 for (i
= 0; i
< dev
->real_num_rx_queues
; i
++)
1160 WRITE_ONCE(rxqs
[i
]->xdp_prog
, prog
);
1164 xdp_features_set_redirect_target(dev
, true);
1166 xdp_features_clear_redirect_target(dev
);
1168 dev
->max_mtu
= prog
? XDP_MAX_MTU
: FUN_MAX_MTU
;
1169 old_prog
= xchg(&fp
->xdp_prog
, prog
);
1171 bpf_prog_put(old_prog
);
1176 static int fun_xdp(struct net_device
*dev
, struct netdev_bpf
*xdp
)
1178 switch (xdp
->command
) {
1179 case XDP_SETUP_PROG
:
1180 return fun_xdp_setup(dev
, xdp
);
1186 static int fun_init_vports(struct fun_ethdev
*ed
, unsigned int n
)
1191 ed
->vport_info
= kvcalloc(n
, sizeof(*ed
->vport_info
), GFP_KERNEL
);
1192 if (!ed
->vport_info
)
1198 static void fun_free_vports(struct fun_ethdev
*ed
)
1200 kvfree(ed
->vport_info
);
1201 ed
->vport_info
= NULL
;
1205 static struct fun_vport_info
*fun_get_vport(struct fun_ethdev
*ed
,
1208 if (!ed
->vport_info
|| vport
>= ed
->num_vports
)
1211 return ed
->vport_info
+ vport
;
1214 static int fun_set_vf_mac(struct net_device
*dev
, int vf
, u8
*mac
)
1216 struct funeth_priv
*fp
= netdev_priv(dev
);
1217 struct fun_adi_param mac_param
= {};
1218 struct fun_dev
*fdev
= fp
->fdev
;
1219 struct fun_ethdev
*ed
= to_fun_ethdev(fdev
);
1220 struct fun_vport_info
*vi
;
1223 if (is_multicast_ether_addr(mac
))
1226 mutex_lock(&ed
->state_mutex
);
1227 vi
= fun_get_vport(ed
, vf
);
1231 mac_param
.u
.mac
= FUN_ADI_MAC_INIT(ether_addr_to_u64(mac
));
1232 rc
= fun_adi_write(fdev
, FUN_ADMIN_ADI_ATTR_MACADDR
, vf
+ 1,
1235 ether_addr_copy(vi
->mac
, mac
);
1237 mutex_unlock(&ed
->state_mutex
);
1241 static int fun_set_vf_vlan(struct net_device
*dev
, int vf
, u16 vlan
, u8 qos
,
1244 struct funeth_priv
*fp
= netdev_priv(dev
);
1245 struct fun_adi_param vlan_param
= {};
1246 struct fun_dev
*fdev
= fp
->fdev
;
1247 struct fun_ethdev
*ed
= to_fun_ethdev(fdev
);
1248 struct fun_vport_info
*vi
;
1251 if (vlan
> 4095 || qos
> 7)
1253 if (vlan_proto
&& vlan_proto
!= htons(ETH_P_8021Q
) &&
1254 vlan_proto
!= htons(ETH_P_8021AD
))
1257 mutex_lock(&ed
->state_mutex
);
1258 vi
= fun_get_vport(ed
, vf
);
1262 vlan_param
.u
.vlan
= FUN_ADI_VLAN_INIT(be16_to_cpu(vlan_proto
),
1263 ((u16
)qos
<< VLAN_PRIO_SHIFT
) | vlan
);
1264 rc
= fun_adi_write(fdev
, FUN_ADMIN_ADI_ATTR_VLAN
, vf
+ 1, &vlan_param
);
1268 vi
->vlan_proto
= vlan_proto
;
1271 mutex_unlock(&ed
->state_mutex
);
1275 static int fun_set_vf_rate(struct net_device
*dev
, int vf
, int min_tx_rate
,
1278 struct funeth_priv
*fp
= netdev_priv(dev
);
1279 struct fun_adi_param rate_param
= {};
1280 struct fun_dev
*fdev
= fp
->fdev
;
1281 struct fun_ethdev
*ed
= to_fun_ethdev(fdev
);
1282 struct fun_vport_info
*vi
;
1288 mutex_lock(&ed
->state_mutex
);
1289 vi
= fun_get_vport(ed
, vf
);
1293 rate_param
.u
.rate
= FUN_ADI_RATE_INIT(max_tx_rate
);
1294 rc
= fun_adi_write(fdev
, FUN_ADMIN_ADI_ATTR_RATE
, vf
+ 1, &rate_param
);
1296 vi
->max_rate
= max_tx_rate
;
1298 mutex_unlock(&ed
->state_mutex
);
1302 static int fun_get_vf_config(struct net_device
*dev
, int vf
,
1303 struct ifla_vf_info
*ivi
)
1305 struct funeth_priv
*fp
= netdev_priv(dev
);
1306 struct fun_ethdev
*ed
= to_fun_ethdev(fp
->fdev
);
1307 const struct fun_vport_info
*vi
;
1309 mutex_lock(&ed
->state_mutex
);
1310 vi
= fun_get_vport(ed
, vf
);
1314 memset(ivi
, 0, sizeof(*ivi
));
1316 ether_addr_copy(ivi
->mac
, vi
->mac
);
1317 ivi
->vlan
= vi
->vlan
;
1319 ivi
->vlan_proto
= vi
->vlan_proto
;
1320 ivi
->max_tx_rate
= vi
->max_rate
;
1321 ivi
->spoofchk
= vi
->spoofchk
;
1323 mutex_unlock(&ed
->state_mutex
);
1324 return vi
? 0 : -EINVAL
;
1327 static void fun_uninit(struct net_device
*dev
)
1329 struct funeth_priv
*fp
= netdev_priv(dev
);
1331 fun_prune_queue_irqs(dev
);
1332 xa_destroy(&fp
->irqs
);
1335 static const struct net_device_ops fun_netdev_ops
= {
1336 .ndo_open
= funeth_open
,
1337 .ndo_stop
= funeth_close
,
1338 .ndo_start_xmit
= fun_start_xmit
,
1339 .ndo_get_stats64
= fun_get_stats64
,
1340 .ndo_change_mtu
= fun_change_mtu
,
1341 .ndo_set_mac_address
= fun_set_macaddr
,
1342 .ndo_validate_addr
= eth_validate_addr
,
1343 .ndo_eth_ioctl
= fun_ioctl
,
1344 .ndo_uninit
= fun_uninit
,
1346 .ndo_xdp_xmit
= fun_xdp_xmit_frames
,
1347 .ndo_set_vf_mac
= fun_set_vf_mac
,
1348 .ndo_set_vf_vlan
= fun_set_vf_vlan
,
1349 .ndo_set_vf_rate
= fun_set_vf_rate
,
1350 .ndo_get_vf_config
= fun_get_vf_config
,
1353 #define GSO_ENCAP_FLAGS (NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | \
1354 NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | \
1355 NETIF_F_GSO_UDP_TUNNEL_CSUM)
1356 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | \
1358 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_HW_CSUM | TSO_FLAGS | \
1359 GSO_ENCAP_FLAGS | NETIF_F_HIGHDMA)
1361 static void fun_dflt_rss_indir(struct funeth_priv
*fp
, unsigned int nrx
)
1365 for (i
= 0; i
< fp
->indir_table_nentries
; i
++)
1366 fp
->indir_table
[i
] = ethtool_rxfh_indir_default(i
, nrx
);
1369 /* Reset the RSS indirection table to equal distribution across the current
1370 * number of Rx queues. Called at init time and whenever the number of Rx
1371 * queues changes subsequently. Note that this may also resize the indirection
1374 static void fun_reset_rss_indir(struct net_device
*dev
, unsigned int nrx
)
1376 struct funeth_priv
*fp
= netdev_priv(dev
);
1381 /* Set the table size to the max possible that allows an equal number
1382 * of occurrences of each CQ.
1384 fp
->indir_table_nentries
= rounddown(FUN_ETH_RSS_MAX_INDIR_ENT
, nrx
);
1385 fun_dflt_rss_indir(fp
, nrx
);
1388 /* Update the RSS LUT to contain only queues in [0, nrx). Normally this will
1389 * update the LUT to an equal distribution among nrx queues, If @only_if_needed
1390 * is set the LUT is left unchanged if it already does not reference any queues
1393 static int fun_rss_set_qnum(struct net_device
*dev
, unsigned int nrx
,
1394 bool only_if_needed
)
1396 struct funeth_priv
*fp
= netdev_priv(dev
);
1397 u32 old_lut
[FUN_ETH_RSS_MAX_INDIR_ENT
];
1398 unsigned int i
, oldsz
;
1404 if (only_if_needed
) {
1405 for (i
= 0; i
< fp
->indir_table_nentries
; i
++)
1406 if (fp
->indir_table
[i
] >= nrx
)
1409 if (i
>= fp
->indir_table_nentries
)
1413 memcpy(old_lut
, fp
->indir_table
, sizeof(old_lut
));
1414 oldsz
= fp
->indir_table_nentries
;
1415 fun_reset_rss_indir(dev
, nrx
);
1417 err
= fun_config_rss(dev
, fp
->hash_algo
, fp
->rss_key
,
1418 fp
->indir_table
, FUN_ADMIN_SUBOP_MODIFY
);
1422 memcpy(fp
->indir_table
, old_lut
, sizeof(old_lut
));
1423 fp
->indir_table_nentries
= oldsz
;
1427 /* Allocate the DMA area for the RSS configuration commands to the device, and
1428 * initialize the hash, hash key, indirection table size and its entries to
1429 * their defaults. The indirection table defaults to equal distribution across
1432 static int fun_init_rss(struct net_device
*dev
)
1434 struct funeth_priv
*fp
= netdev_priv(dev
);
1435 size_t size
= sizeof(fp
->rss_key
) + sizeof(fp
->indir_table
);
1437 fp
->rss_hw_id
= FUN_HCI_ID_INVALID
;
1438 if (!(fp
->port_caps
& FUN_PORT_CAP_OFFLOADS
))
1441 fp
->rss_cfg
= dma_alloc_coherent(&fp
->pdev
->dev
, size
,
1442 &fp
->rss_dma_addr
, GFP_KERNEL
);
1446 fp
->hash_algo
= FUN_ETH_RSS_ALG_TOEPLITZ
;
1447 netdev_rss_key_fill(fp
->rss_key
, sizeof(fp
->rss_key
));
1448 fun_reset_rss_indir(dev
, dev
->real_num_rx_queues
);
1452 static void fun_free_rss(struct funeth_priv
*fp
)
1455 dma_free_coherent(&fp
->pdev
->dev
,
1456 sizeof(fp
->rss_key
) + sizeof(fp
->indir_table
),
1457 fp
->rss_cfg
, fp
->rss_dma_addr
);
1462 void fun_set_ring_count(struct net_device
*netdev
, unsigned int ntx
,
1465 netif_set_real_num_tx_queues(netdev
, ntx
);
1466 if (nrx
!= netdev
->real_num_rx_queues
) {
1467 netif_set_real_num_rx_queues(netdev
, nrx
);
1468 fun_reset_rss_indir(netdev
, nrx
);
1472 static int fun_init_stats_area(struct funeth_priv
*fp
)
1474 unsigned int nstats
;
1476 if (!(fp
->port_caps
& FUN_PORT_CAP_STATS
))
1479 nstats
= PORT_MAC_RX_STATS_MAX
+ PORT_MAC_TX_STATS_MAX
+
1480 PORT_MAC_FEC_STATS_MAX
;
1482 fp
->stats
= dma_alloc_coherent(&fp
->pdev
->dev
, nstats
* sizeof(u64
),
1483 &fp
->stats_dma_addr
, GFP_KERNEL
);
1489 static void fun_free_stats_area(struct funeth_priv
*fp
)
1491 unsigned int nstats
;
1494 nstats
= PORT_MAC_RX_STATS_MAX
+ PORT_MAC_TX_STATS_MAX
;
1495 dma_free_coherent(&fp
->pdev
->dev
, nstats
* sizeof(u64
),
1496 fp
->stats
, fp
->stats_dma_addr
);
1501 static int fun_dl_port_register(struct net_device
*netdev
)
1503 struct funeth_priv
*fp
= netdev_priv(netdev
);
1504 struct devlink
*dl
= priv_to_devlink(fp
->fdev
);
1505 struct devlink_port_attrs attrs
= {};
1508 if (fp
->port_caps
& FUN_PORT_CAP_VPORT
) {
1509 attrs
.flavour
= DEVLINK_PORT_FLAVOUR_VIRTUAL
;
1512 idx
= netdev
->dev_port
;
1513 attrs
.flavour
= DEVLINK_PORT_FLAVOUR_PHYSICAL
;
1514 attrs
.lanes
= fp
->lane_attrs
& 7;
1515 if (fp
->lane_attrs
& FUN_PORT_LANE_SPLIT
) {
1517 attrs
.phys
.port_number
= fp
->lport
& ~3;
1518 attrs
.phys
.split_subport_number
= fp
->lport
& 3;
1520 attrs
.phys
.port_number
= fp
->lport
;
1524 devlink_port_attrs_set(&fp
->dl_port
, &attrs
);
1526 return devlink_port_register(dl
, &fp
->dl_port
, idx
);
1529 /* Determine the max Tx/Rx queues for a port. */
1530 static int fun_max_qs(struct fun_ethdev
*ed
, unsigned int *ntx
,
1535 if (ed
->num_ports
> 1 || is_kdump_kernel()) {
1541 neth
= fun_get_res_count(&ed
->fdev
, FUN_ADMIN_OP_ETH
);
1545 /* We determine the max number of queues based on the CPU
1546 * cores, device interrupts and queues, RSS size, and device Tx flows.
1548 * - At least 1 Rx and 1 Tx queues.
1549 * - At most 1 Rx/Tx queue per core.
1550 * - Each Rx/Tx queue needs 1 SQ.
1552 *ntx
= min(ed
->nsqs_per_port
- 1, num_online_cpus());
1556 if (*nrx
> FUN_ETH_RSS_MAX_INDIR_ENT
)
1557 *nrx
= FUN_ETH_RSS_MAX_INDIR_ENT
;
1561 static void fun_queue_defaults(struct net_device
*dev
, unsigned int nsqs
)
1563 unsigned int ntx
, nrx
;
1565 ntx
= min(dev
->num_tx_queues
, FUN_DFLT_QUEUES
);
1566 nrx
= min(dev
->num_rx_queues
, FUN_DFLT_QUEUES
);
1568 ntx
= min(ntx
, nsqs
/ 2);
1569 nrx
= min(nrx
, nsqs
- ntx
);
1571 nrx
= min(nrx
, nsqs
/ 2);
1572 ntx
= min(ntx
, nsqs
- nrx
);
1575 netif_set_real_num_tx_queues(dev
, ntx
);
1576 netif_set_real_num_rx_queues(dev
, nrx
);
1579 /* Replace the existing Rx/Tx/XDP queues with equal number of queues with
1580 * different settings, e.g. depth. This is a disruptive replacement that
1581 * temporarily shuts down the data path and should be limited to changes that
1582 * can't be applied to live queues. The old queues are always discarded.
1584 int fun_replace_queues(struct net_device
*dev
, struct fun_qset
*newqs
,
1585 struct netlink_ext_ack
*extack
)
1587 struct fun_qset oldqs
= { .state
= FUN_QSTATE_DESTROYED
};
1588 struct funeth_priv
*fp
= netdev_priv(dev
);
1591 newqs
->nrxqs
= dev
->real_num_rx_queues
;
1592 newqs
->ntxqs
= dev
->real_num_tx_queues
;
1593 newqs
->nxdpqs
= fp
->num_xdpqs
;
1594 newqs
->state
= FUN_QSTATE_INIT_SW
;
1595 err
= fun_alloc_rings(dev
, newqs
);
1597 NL_SET_ERR_MSG_MOD(extack
,
1598 "Unable to allocate memory for new queues, keeping current settings");
1602 fun_down(dev
, &oldqs
);
1604 err
= fun_up(dev
, newqs
);
1608 /* The new queues couldn't be installed. We do not retry the old queues
1609 * as they are the same to the device as the new queues and would
1612 newqs
->state
= FUN_QSTATE_DESTROYED
;
1613 fun_free_rings(dev
, newqs
);
1614 NL_SET_ERR_MSG_MOD(extack
, "Unable to restore the data path with the new queues.");
1618 /* Change the number of Rx/Tx queues of a device while it is up. This is done
1619 * by incrementally adding/removing queues to meet the new requirements while
1620 * handling ongoing traffic.
1622 int fun_change_num_queues(struct net_device
*dev
, unsigned int ntx
,
1625 unsigned int keep_tx
= min(dev
->real_num_tx_queues
, ntx
);
1626 unsigned int keep_rx
= min(dev
->real_num_rx_queues
, nrx
);
1627 struct funeth_priv
*fp
= netdev_priv(dev
);
1628 struct fun_qset oldqs
= {
1629 .rxqs
= rtnl_dereference(fp
->rxqs
),
1631 .nrxqs
= dev
->real_num_rx_queues
,
1632 .ntxqs
= dev
->real_num_tx_queues
,
1633 .rxq_start
= keep_rx
,
1634 .txq_start
= keep_tx
,
1635 .state
= FUN_QSTATE_DESTROYED
1637 struct fun_qset newqs
= {
1640 .rxq_start
= keep_rx
,
1641 .txq_start
= keep_tx
,
1642 .cq_depth
= fp
->cq_depth
,
1643 .rq_depth
= fp
->rq_depth
,
1644 .sq_depth
= fp
->sq_depth
,
1645 .state
= FUN_QSTATE_INIT_FULL
1649 err
= fun_alloc_rings(dev
, &newqs
);
1653 err
= fun_enable_irqs(dev
); /* of any newly added queues */
1657 /* copy the queues we are keeping to the new set */
1658 memcpy(newqs
.rxqs
, oldqs
.rxqs
, keep_rx
* sizeof(*oldqs
.rxqs
));
1659 memcpy(newqs
.txqs
, fp
->txqs
, keep_tx
* sizeof(*fp
->txqs
));
1661 if (nrx
< dev
->real_num_rx_queues
) {
1662 err
= fun_rss_set_qnum(dev
, nrx
, true);
1664 goto disable_tx_irqs
;
1666 for (i
= nrx
; i
< dev
->real_num_rx_queues
; i
++)
1667 fun_disable_one_irq(container_of(oldqs
.rxqs
[i
]->napi
,
1668 struct fun_irq
, napi
));
1670 netif_set_real_num_rx_queues(dev
, nrx
);
1673 if (ntx
< dev
->real_num_tx_queues
)
1674 netif_set_real_num_tx_queues(dev
, ntx
);
1676 rcu_assign_pointer(fp
->rxqs
, newqs
.rxqs
);
1677 fp
->txqs
= newqs
.txqs
;
1680 if (ntx
> dev
->real_num_tx_queues
)
1681 netif_set_real_num_tx_queues(dev
, ntx
);
1683 if (nrx
> dev
->real_num_rx_queues
) {
1684 netif_set_real_num_rx_queues(dev
, nrx
);
1685 fun_rss_set_qnum(dev
, nrx
, false);
1688 /* disable interrupts of any excess Tx queues */
1689 for (i
= keep_tx
; i
< oldqs
.ntxqs
; i
++)
1690 fun_disable_one_irq(oldqs
.txqs
[i
]->irq
);
1692 fun_free_rings(dev
, &oldqs
);
1693 fun_prune_queue_irqs(dev
);
1697 for (i
= oldqs
.ntxqs
; i
< ntx
; i
++)
1698 fun_disable_one_irq(newqs
.txqs
[i
]->irq
);
1700 newqs
.state
= FUN_QSTATE_DESTROYED
;
1701 fun_free_rings(dev
, &newqs
);
1703 fun_prune_queue_irqs(dev
);
1707 static int fun_create_netdev(struct fun_ethdev
*ed
, unsigned int portid
)
1709 struct fun_dev
*fdev
= &ed
->fdev
;
1710 struct net_device
*netdev
;
1711 struct funeth_priv
*fp
;
1712 unsigned int ntx
, nrx
;
1715 rc
= fun_max_qs(ed
, &ntx
, &nrx
);
1719 netdev
= alloc_etherdev_mqs(sizeof(*fp
), ntx
, nrx
);
1725 netdev
->dev_port
= portid
;
1726 fun_queue_defaults(netdev
, ed
->nsqs_per_port
);
1728 fp
= netdev_priv(netdev
);
1730 fp
->pdev
= to_pci_dev(fdev
->dev
);
1731 fp
->netdev
= netdev
;
1733 fp
->rx_irq_ofst
= ntx
;
1734 seqcount_init(&fp
->link_seq
);
1736 fp
->lport
= INVALID_LPORT
;
1737 rc
= fun_port_create(netdev
);
1741 /* bind port to admin CQ for async events */
1742 rc
= fun_bind(fdev
, FUN_ADMIN_BIND_TYPE_PORT
, portid
,
1743 FUN_ADMIN_BIND_TYPE_EPCQ
, 0);
1747 rc
= fun_get_port_attributes(netdev
);
1751 rc
= fun_init_rss(netdev
);
1755 rc
= fun_init_stats_area(fp
);
1759 SET_NETDEV_DEV(netdev
, fdev
->dev
);
1760 SET_NETDEV_DEVLINK_PORT(netdev
, &fp
->dl_port
);
1761 netdev
->netdev_ops
= &fun_netdev_ops
;
1763 netdev
->hw_features
= NETIF_F_SG
| NETIF_F_RXHASH
| NETIF_F_RXCSUM
;
1764 if (fp
->port_caps
& FUN_PORT_CAP_OFFLOADS
)
1765 netdev
->hw_features
|= NETIF_F_HW_CSUM
| TSO_FLAGS
;
1766 if (fp
->port_caps
& FUN_PORT_CAP_ENCAP_OFFLOADS
)
1767 netdev
->hw_features
|= GSO_ENCAP_FLAGS
;
1769 netdev
->features
|= netdev
->hw_features
| NETIF_F_HIGHDMA
;
1770 netdev
->vlan_features
= netdev
->features
& VLAN_FEAT
;
1771 netdev
->mpls_features
= netdev
->vlan_features
;
1772 netdev
->hw_enc_features
= netdev
->hw_features
;
1773 netdev
->xdp_features
= NETDEV_XDP_ACT_BASIC
| NETDEV_XDP_ACT_REDIRECT
;
1775 netdev
->min_mtu
= ETH_MIN_MTU
;
1776 netdev
->max_mtu
= FUN_MAX_MTU
;
1778 fun_set_ethtool_ops(netdev
);
1780 /* configurable parameters */
1781 fp
->sq_depth
= min(SQ_DEPTH
, fdev
->q_depth
);
1782 fp
->cq_depth
= min(CQ_DEPTH
, fdev
->q_depth
);
1783 fp
->rq_depth
= min_t(unsigned int, RQ_DEPTH
, fdev
->q_depth
);
1784 fp
->rx_coal_usec
= CQ_INTCOAL_USEC
;
1785 fp
->rx_coal_count
= CQ_INTCOAL_NPKT
;
1786 fp
->tx_coal_usec
= SQ_INTCOAL_USEC
;
1787 fp
->tx_coal_count
= SQ_INTCOAL_NPKT
;
1788 fp
->cq_irq_db
= FUN_IRQ_CQ_DB(fp
->rx_coal_usec
, fp
->rx_coal_count
);
1790 rc
= fun_dl_port_register(netdev
);
1794 fp
->ktls_id
= FUN_HCI_ID_INVALID
;
1795 fun_ktls_init(netdev
); /* optional, failure OK */
1797 netif_carrier_off(netdev
);
1798 ed
->netdevs
[portid
] = netdev
;
1799 rc
= register_netdev(netdev
);
1805 ed
->netdevs
[portid
] = NULL
;
1806 fun_ktls_cleanup(fp
);
1807 devlink_port_unregister(&fp
->dl_port
);
1809 fun_free_stats_area(fp
);
1813 fun_port_destroy(netdev
);
1815 free_netdev(netdev
);
1817 dev_err(fdev
->dev
, "couldn't allocate port %u, error %d", portid
, rc
);
1821 static void fun_destroy_netdev(struct net_device
*netdev
)
1823 struct funeth_priv
*fp
;
1825 fp
= netdev_priv(netdev
);
1826 unregister_netdev(netdev
);
1827 devlink_port_unregister(&fp
->dl_port
);
1828 fun_ktls_cleanup(fp
);
1829 fun_free_stats_area(fp
);
1831 fun_port_destroy(netdev
);
1832 free_netdev(netdev
);
1835 static int fun_create_ports(struct fun_ethdev
*ed
, unsigned int nports
)
1837 struct fun_dev
*fd
= &ed
->fdev
;
1840 /* The admin queue takes 1 IRQ and 2 SQs. */
1841 ed
->nsqs_per_port
= min(fd
->num_irqs
- 1,
1842 fd
->kern_end_qid
- 2) / nports
;
1843 if (ed
->nsqs_per_port
< 2) {
1844 dev_err(fd
->dev
, "Too few SQs for %u ports", nports
);
1848 ed
->netdevs
= kcalloc(nports
, sizeof(*ed
->netdevs
), GFP_KERNEL
);
1852 ed
->num_ports
= nports
;
1853 for (i
= 0; i
< nports
; i
++) {
1854 rc
= fun_create_netdev(ed
, i
);
1863 fun_destroy_netdev(ed
->netdevs
[--i
]);
1870 static void fun_destroy_ports(struct fun_ethdev
*ed
)
1874 for (i
= 0; i
< ed
->num_ports
; i
++)
1875 fun_destroy_netdev(ed
->netdevs
[i
]);
1882 static void fun_update_link_state(const struct fun_ethdev
*ed
,
1883 const struct fun_admin_port_notif
*notif
)
1885 unsigned int port_idx
= be16_to_cpu(notif
->id
);
1886 struct net_device
*netdev
;
1887 struct funeth_priv
*fp
;
1889 if (port_idx
>= ed
->num_ports
)
1892 netdev
= ed
->netdevs
[port_idx
];
1893 fp
= netdev_priv(netdev
);
1895 write_seqcount_begin(&fp
->link_seq
);
1896 fp
->link_speed
= be32_to_cpu(notif
->speed
) * 10; /* 10 Mbps->Mbps */
1897 fp
->active_fc
= notif
->flow_ctrl
;
1898 fp
->active_fec
= notif
->fec
;
1899 fp
->xcvr_type
= notif
->xcvr_type
;
1900 fp
->link_down_reason
= notif
->link_down_reason
;
1901 fp
->lp_advertising
= be64_to_cpu(notif
->lp_advertising
);
1903 if ((notif
->link_state
| notif
->missed_events
) & FUN_PORT_FLAG_MAC_DOWN
)
1904 netif_carrier_off(netdev
);
1905 if (notif
->link_state
& FUN_PORT_FLAG_MAC_UP
)
1906 netif_carrier_on(netdev
);
1908 write_seqcount_end(&fp
->link_seq
);
1909 fun_report_link(netdev
);
1912 /* handler for async events delivered through the admin CQ */
1913 static void fun_event_cb(struct fun_dev
*fdev
, void *entry
)
1915 u8 op
= ((struct fun_admin_rsp_common
*)entry
)->op
;
1917 if (op
== FUN_ADMIN_OP_PORT
) {
1918 const struct fun_admin_port_notif
*rsp
= entry
;
1920 if (rsp
->subop
== FUN_ADMIN_SUBOP_NOTIFY
) {
1921 fun_update_link_state(to_fun_ethdev(fdev
), rsp
);
1922 } else if (rsp
->subop
== FUN_ADMIN_SUBOP_RES_COUNT
) {
1923 const struct fun_admin_res_count_rsp
*r
= entry
;
1926 set_bit(FUN_SERV_RES_CHANGE
, &fdev
->service_flags
);
1928 set_bit(FUN_SERV_DEL_PORTS
, &fdev
->service_flags
);
1929 fun_serv_sched(fdev
);
1931 dev_info(fdev
->dev
, "adminq event unexpected op %u subop %u",
1935 dev_info(fdev
->dev
, "adminq event unexpected op %u", op
);
1939 /* handler for pending work managed by the service task */
1940 static void fun_service_cb(struct fun_dev
*fdev
)
1942 struct fun_ethdev
*ed
= to_fun_ethdev(fdev
);
1945 if (test_and_clear_bit(FUN_SERV_DEL_PORTS
, &fdev
->service_flags
))
1946 fun_destroy_ports(ed
);
1948 if (!test_and_clear_bit(FUN_SERV_RES_CHANGE
, &fdev
->service_flags
))
1951 rc
= fun_get_res_count(fdev
, FUN_ADMIN_OP_PORT
);
1952 if (rc
< 0 || rc
== ed
->num_ports
)
1956 fun_destroy_ports(ed
);
1958 fun_create_ports(ed
, rc
);
1961 static int funeth_sriov_configure(struct pci_dev
*pdev
, int nvfs
)
1963 struct fun_dev
*fdev
= pci_get_drvdata(pdev
);
1964 struct fun_ethdev
*ed
= to_fun_ethdev(fdev
);
1968 if (pci_vfs_assigned(pdev
)) {
1969 dev_warn(&pdev
->dev
,
1970 "Cannot disable SR-IOV while VFs are assigned\n");
1974 mutex_lock(&ed
->state_mutex
);
1975 fun_free_vports(ed
);
1976 mutex_unlock(&ed
->state_mutex
);
1977 pci_disable_sriov(pdev
);
1981 rc
= pci_enable_sriov(pdev
, nvfs
);
1985 mutex_lock(&ed
->state_mutex
);
1986 rc
= fun_init_vports(ed
, nvfs
);
1987 mutex_unlock(&ed
->state_mutex
);
1989 pci_disable_sriov(pdev
);
1996 static int funeth_probe(struct pci_dev
*pdev
, const struct pci_device_id
*id
)
1998 struct fun_dev_params aqreq
= {
1999 .cqe_size_log2
= ilog2(ADMIN_CQE_SIZE
),
2000 .sqe_size_log2
= ilog2(ADMIN_SQE_SIZE
),
2001 .cq_depth
= ADMIN_CQ_DEPTH
,
2002 .sq_depth
= ADMIN_SQ_DEPTH
,
2003 .rq_depth
= ADMIN_RQ_DEPTH
,
2004 .min_msix
= 2, /* 1 Rx + 1 Tx */
2005 .event_cb
= fun_event_cb
,
2006 .serv_cb
= fun_service_cb
,
2008 struct devlink
*devlink
;
2009 struct fun_ethdev
*ed
;
2010 struct fun_dev
*fdev
;
2013 devlink
= fun_devlink_alloc(&pdev
->dev
);
2015 dev_err(&pdev
->dev
, "devlink alloc failed\n");
2019 ed
= devlink_priv(devlink
);
2020 mutex_init(&ed
->state_mutex
);
2023 rc
= fun_dev_enable(fdev
, pdev
, &aqreq
, KBUILD_MODNAME
);
2027 rc
= fun_get_res_count(fdev
, FUN_ADMIN_OP_PORT
);
2029 rc
= fun_create_ports(ed
, rc
);
2033 fun_serv_restart(fdev
);
2034 fun_devlink_register(devlink
);
2038 fun_dev_disable(fdev
);
2040 mutex_destroy(&ed
->state_mutex
);
2041 fun_devlink_free(devlink
);
2045 static void funeth_remove(struct pci_dev
*pdev
)
2047 struct fun_dev
*fdev
= pci_get_drvdata(pdev
);
2048 struct devlink
*devlink
;
2049 struct fun_ethdev
*ed
;
2051 ed
= to_fun_ethdev(fdev
);
2052 devlink
= priv_to_devlink(ed
);
2053 fun_devlink_unregister(devlink
);
2055 #ifdef CONFIG_PCI_IOV
2056 funeth_sriov_configure(pdev
, 0);
2059 fun_serv_stop(fdev
);
2060 fun_destroy_ports(ed
);
2061 fun_dev_disable(fdev
);
2062 mutex_destroy(&ed
->state_mutex
);
2064 fun_devlink_free(devlink
);
2067 static struct pci_driver funeth_driver
= {
2068 .name
= KBUILD_MODNAME
,
2069 .id_table
= funeth_id_table
,
2070 .probe
= funeth_probe
,
2071 .remove
= funeth_remove
,
2072 .shutdown
= funeth_remove
,
2073 .sriov_configure
= funeth_sriov_configure
,
2076 module_pci_driver(funeth_driver
);
2078 MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>");
2079 MODULE_DESCRIPTION("Fungible Ethernet Network Driver");
2080 MODULE_LICENSE("Dual BSD/GPL");
2081 MODULE_DEVICE_TABLE(pci
, funeth_id_table
);