1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /* Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
7 #include <linux/module.h>
8 #include <net/addrconf.h>
9 #include <rdma/erdma-abi.h>
13 #include "erdma_verbs.h"
15 MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
16 MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
17 MODULE_LICENSE("Dual BSD/GPL");
19 static int erdma_netdev_event(struct notifier_block
*nb
, unsigned long event
,
22 struct net_device
*netdev
= netdev_notifier_info_to_dev(arg
);
23 struct erdma_dev
*dev
= container_of(nb
, struct erdma_dev
, netdev_nb
);
25 if (dev
->netdev
== NULL
|| dev
->netdev
!= netdev
)
30 dev
->state
= IB_PORT_ACTIVE
;
31 erdma_port_event(dev
, IB_EVENT_PORT_ACTIVE
);
34 dev
->state
= IB_PORT_DOWN
;
35 erdma_port_event(dev
, IB_EVENT_PORT_ERR
);
37 case NETDEV_CHANGEMTU
:
38 if (dev
->mtu
!= netdev
->mtu
) {
39 erdma_set_mtu(dev
, netdev
->mtu
);
40 dev
->mtu
= netdev
->mtu
;
44 case NETDEV_UNREGISTER
:
45 case NETDEV_CHANGEADDR
:
46 case NETDEV_GOING_DOWN
:
56 static int erdma_enum_and_get_netdev(struct erdma_dev
*dev
)
58 struct net_device
*netdev
;
59 int ret
= -EPROBE_DEFER
;
61 /* Already binded to a net_device, so we skip. */
66 for_each_netdev(&init_net
, netdev
) {
68 * In erdma, the paired netdev and ibdev should have the same
69 * MAC address. erdma can get the value from its PCIe bar
70 * registers. Since erdma can not get the paired netdev
71 * reference directly, we do a traverse here to get the paired
74 if (ether_addr_equal_unaligned(netdev
->perm_addr
,
75 dev
->attrs
.peer_addr
)) {
76 ret
= ib_device_set_netdev(&dev
->ibdev
, netdev
, 1);
79 ibdev_warn(&dev
->ibdev
,
80 "failed (%d) to link netdev", ret
);
94 static int erdma_device_register(struct erdma_dev
*dev
)
96 struct ib_device
*ibdev
= &dev
->ibdev
;
99 ret
= erdma_enum_and_get_netdev(dev
);
103 dev
->mtu
= dev
->netdev
->mtu
;
104 addrconf_addr_eui48((u8
*)&ibdev
->node_guid
, dev
->netdev
->dev_addr
);
106 ret
= ib_register_device(ibdev
, "erdma_%d", &dev
->pdev
->dev
);
108 dev_err(&dev
->pdev
->dev
,
109 "ib_register_device failed: ret = %d\n", ret
);
113 dev
->netdev_nb
.notifier_call
= erdma_netdev_event
;
114 ret
= register_netdevice_notifier(&dev
->netdev_nb
);
116 ibdev_err(&dev
->ibdev
, "failed to register notifier.\n");
117 ib_unregister_device(ibdev
);
123 static irqreturn_t
erdma_comm_irq_handler(int irq
, void *data
)
125 struct erdma_dev
*dev
= data
;
127 erdma_cmdq_completion_handler(&dev
->cmdq
);
128 erdma_aeq_event_handler(dev
);
133 static int erdma_request_vectors(struct erdma_dev
*dev
)
135 int expect_irq_num
= min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC
);
138 ret
= pci_alloc_irq_vectors(dev
->pdev
, 1, expect_irq_num
, PCI_IRQ_MSIX
);
140 dev_err(&dev
->pdev
->dev
, "request irq vectors failed(%d)\n",
144 dev
->attrs
.irq_num
= ret
;
149 static int erdma_comm_irq_init(struct erdma_dev
*dev
)
151 snprintf(dev
->comm_irq
.name
, ERDMA_IRQNAME_SIZE
, "erdma-common@pci:%s",
152 pci_name(dev
->pdev
));
153 dev
->comm_irq
.msix_vector
=
154 pci_irq_vector(dev
->pdev
, ERDMA_MSIX_VECTOR_CMDQ
);
156 cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev
->pdev
->bus
)),
157 &dev
->comm_irq
.affinity_hint_mask
);
158 irq_set_affinity_hint(dev
->comm_irq
.msix_vector
,
159 &dev
->comm_irq
.affinity_hint_mask
);
161 return request_irq(dev
->comm_irq
.msix_vector
, erdma_comm_irq_handler
, 0,
162 dev
->comm_irq
.name
, dev
);
165 static void erdma_comm_irq_uninit(struct erdma_dev
*dev
)
167 irq_set_affinity_hint(dev
->comm_irq
.msix_vector
, NULL
);
168 free_irq(dev
->comm_irq
.msix_vector
, dev
);
171 static int erdma_device_init(struct erdma_dev
*dev
, struct pci_dev
*pdev
)
175 dev
->resp_pool
= dma_pool_create("erdma_resp_pool", &pdev
->dev
,
176 ERDMA_HW_RESP_SIZE
, ERDMA_HW_RESP_SIZE
,
181 dev
->db_pool
= dma_pool_create("erdma_db_pool", &pdev
->dev
,
182 ERDMA_DB_SIZE
, ERDMA_DB_SIZE
, 0);
185 goto destroy_resp_pool
;
188 ret
= dma_set_mask_and_coherent(&pdev
->dev
,
189 DMA_BIT_MASK(ERDMA_PCI_WIDTH
));
191 goto destroy_db_pool
;
193 dma_set_max_seg_size(&pdev
->dev
, UINT_MAX
);
198 dma_pool_destroy(dev
->db_pool
);
201 dma_pool_destroy(dev
->resp_pool
);
206 static void erdma_device_uninit(struct erdma_dev
*dev
)
208 dma_pool_destroy(dev
->db_pool
);
209 dma_pool_destroy(dev
->resp_pool
);
212 static void erdma_hw_reset(struct erdma_dev
*dev
)
214 u32 ctrl
= FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK
, 1);
216 erdma_reg_write32(dev
, ERDMA_REGS_DEV_CTRL_REG
, ctrl
);
219 static int erdma_wait_hw_init_done(struct erdma_dev
*dev
)
223 erdma_reg_write32(dev
, ERDMA_REGS_DEV_CTRL_REG
,
224 FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK
, 1));
226 for (i
= 0; i
< ERDMA_WAIT_DEV_DONE_CNT
; i
++) {
227 if (erdma_reg_read32_filed(dev
, ERDMA_REGS_DEV_ST_REG
,
228 ERDMA_REG_DEV_ST_INIT_DONE_MASK
))
231 msleep(ERDMA_REG_ACCESS_WAIT_MS
);
234 if (i
== ERDMA_WAIT_DEV_DONE_CNT
) {
235 dev_err(&dev
->pdev
->dev
, "wait init done failed.\n");
242 static const struct pci_device_id erdma_pci_tbl
[] = {
243 { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA
, 0x107f) },
247 static int erdma_probe_dev(struct pci_dev
*pdev
)
249 struct erdma_dev
*dev
;
253 err
= pci_enable_device(pdev
);
255 dev_err(&pdev
->dev
, "pci_enable_device failed(%d)\n", err
);
259 pci_set_master(pdev
);
261 dev
= ib_alloc_device(erdma_dev
, ibdev
);
263 dev_err(&pdev
->dev
, "ib_alloc_device failed\n");
265 goto err_disable_device
;
268 pci_set_drvdata(pdev
, dev
);
270 dev
->attrs
.numa_node
= dev_to_node(&pdev
->dev
);
272 bars
= pci_select_bars(pdev
, IORESOURCE_MEM
);
273 err
= pci_request_selected_regions(pdev
, bars
, DRV_MODULE_NAME
);
274 if (bars
!= ERDMA_BAR_MASK
|| err
) {
275 err
= err
? err
: -EINVAL
;
276 goto err_ib_device_release
;
279 dev
->func_bar_addr
= pci_resource_start(pdev
, ERDMA_FUNC_BAR
);
280 dev
->func_bar_len
= pci_resource_len(pdev
, ERDMA_FUNC_BAR
);
283 devm_ioremap(&pdev
->dev
, dev
->func_bar_addr
, dev
->func_bar_len
);
284 if (!dev
->func_bar
) {
285 dev_err(&pdev
->dev
, "devm_ioremap failed.\n");
287 goto err_release_bars
;
290 version
= erdma_reg_read32(dev
, ERDMA_REGS_VERSION_REG
);
292 /* we knows that it is a non-functional function. */
294 goto err_iounmap_func_bar
;
297 err
= erdma_device_init(dev
, pdev
);
299 goto err_iounmap_func_bar
;
301 err
= erdma_request_vectors(dev
);
303 goto err_uninit_device
;
305 err
= erdma_comm_irq_init(dev
);
307 goto err_free_vectors
;
309 err
= erdma_aeq_init(dev
);
311 goto err_uninit_comm_irq
;
313 err
= erdma_cmdq_init(dev
);
317 err
= erdma_wait_hw_init_done(dev
);
319 goto err_uninit_cmdq
;
321 err
= erdma_ceqs_init(dev
);
325 erdma_finish_cmdq_init(dev
);
333 erdma_cmdq_destroy(dev
);
336 erdma_eq_destroy(dev
, &dev
->aeq
);
339 erdma_comm_irq_uninit(dev
);
342 pci_free_irq_vectors(dev
->pdev
);
345 erdma_device_uninit(dev
);
347 err_iounmap_func_bar
:
348 devm_iounmap(&pdev
->dev
, dev
->func_bar
);
351 pci_release_selected_regions(pdev
, bars
);
353 err_ib_device_release
:
354 ib_dealloc_device(&dev
->ibdev
);
357 pci_disable_device(pdev
);
362 static void erdma_remove_dev(struct pci_dev
*pdev
)
364 struct erdma_dev
*dev
= pci_get_drvdata(pdev
);
366 erdma_ceqs_uninit(dev
);
368 erdma_cmdq_destroy(dev
);
369 erdma_eq_destroy(dev
, &dev
->aeq
);
370 erdma_comm_irq_uninit(dev
);
371 pci_free_irq_vectors(dev
->pdev
);
372 erdma_device_uninit(dev
);
374 devm_iounmap(&pdev
->dev
, dev
->func_bar
);
375 pci_release_selected_regions(pdev
, ERDMA_BAR_MASK
);
377 ib_dealloc_device(&dev
->ibdev
);
379 pci_disable_device(pdev
);
382 #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
384 static int erdma_dev_attrs_init(struct erdma_dev
*dev
)
387 u64 req_hdr
, cap0
, cap1
;
389 erdma_cmdq_build_reqhdr(&req_hdr
, CMDQ_SUBMOD_RDMA
,
390 CMDQ_OPCODE_QUERY_DEVICE
);
392 err
= erdma_post_cmd_wait(&dev
->cmdq
, &req_hdr
, sizeof(req_hdr
), &cap0
,
397 dev
->attrs
.max_cqe
= 1 << ERDMA_GET_CAP(MAX_CQE
, cap0
);
398 dev
->attrs
.max_mr_size
= 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE
, cap0
);
399 dev
->attrs
.max_mw
= 1 << ERDMA_GET_CAP(MAX_MW
, cap1
);
400 dev
->attrs
.max_recv_wr
= 1 << ERDMA_GET_CAP(MAX_RECV_WR
, cap0
);
401 dev
->attrs
.local_dma_key
= ERDMA_GET_CAP(DMA_LOCAL_KEY
, cap1
);
402 dev
->attrs
.cc
= ERDMA_GET_CAP(DEFAULT_CC
, cap1
);
403 dev
->attrs
.max_qp
= ERDMA_NQP_PER_QBLOCK
* ERDMA_GET_CAP(QBLOCK
, cap1
);
404 dev
->attrs
.max_mr
= dev
->attrs
.max_qp
<< 1;
405 dev
->attrs
.max_cq
= dev
->attrs
.max_qp
<< 1;
406 dev
->attrs
.cap_flags
= ERDMA_GET_CAP(FLAGS
, cap0
);
408 dev
->attrs
.max_send_wr
= ERDMA_MAX_SEND_WR
;
409 dev
->attrs
.max_ord
= ERDMA_MAX_ORD
;
410 dev
->attrs
.max_ird
= ERDMA_MAX_IRD
;
411 dev
->attrs
.max_send_sge
= ERDMA_MAX_SEND_SGE
;
412 dev
->attrs
.max_recv_sge
= ERDMA_MAX_RECV_SGE
;
413 dev
->attrs
.max_sge_rd
= ERDMA_MAX_SGE_RD
;
414 dev
->attrs
.max_pd
= ERDMA_MAX_PD
;
416 dev
->res_cb
[ERDMA_RES_TYPE_PD
].max_cap
= ERDMA_MAX_PD
;
417 dev
->res_cb
[ERDMA_RES_TYPE_STAG_IDX
].max_cap
= dev
->attrs
.max_mr
;
419 erdma_cmdq_build_reqhdr(&req_hdr
, CMDQ_SUBMOD_COMMON
,
420 CMDQ_OPCODE_QUERY_FW_INFO
);
422 err
= erdma_post_cmd_wait(&dev
->cmdq
, &req_hdr
, sizeof(req_hdr
), &cap0
,
425 dev
->attrs
.fw_version
=
426 FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK
, cap0
);
431 static int erdma_device_config(struct erdma_dev
*dev
)
433 struct erdma_cmdq_config_device_req req
= {};
435 if (!(dev
->attrs
.cap_flags
& ERDMA_DEV_CAP_FLAGS_EXTEND_DB
))
438 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_COMMON
,
439 CMDQ_OPCODE_CONF_DEVICE
);
441 req
.cfg
= FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK
, PAGE_SHIFT
) |
442 FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK
, 1);
444 return erdma_post_cmd_wait(&dev
->cmdq
, &req
, sizeof(req
), NULL
, NULL
);
447 static int erdma_res_cb_init(struct erdma_dev
*dev
)
451 for (i
= 0; i
< ERDMA_RES_CNT
; i
++) {
452 dev
->res_cb
[i
].next_alloc_idx
= 1;
453 spin_lock_init(&dev
->res_cb
[i
].lock
);
454 dev
->res_cb
[i
].bitmap
=
455 bitmap_zalloc(dev
->res_cb
[i
].max_cap
, GFP_KERNEL
);
456 if (!dev
->res_cb
[i
].bitmap
)
463 for (j
= 0; j
< i
; j
++)
464 bitmap_free(dev
->res_cb
[j
].bitmap
);
469 static void erdma_res_cb_free(struct erdma_dev
*dev
)
473 for (i
= 0; i
< ERDMA_RES_CNT
; i
++)
474 bitmap_free(dev
->res_cb
[i
].bitmap
);
477 static const struct ib_device_ops erdma_device_ops
= {
478 .owner
= THIS_MODULE
,
479 .driver_id
= RDMA_DRIVER_ERDMA
,
480 .uverbs_abi_ver
= ERDMA_ABI_VERSION
,
482 .alloc_hw_port_stats
= erdma_alloc_hw_port_stats
,
483 .alloc_mr
= erdma_ib_alloc_mr
,
484 .alloc_pd
= erdma_alloc_pd
,
485 .alloc_ucontext
= erdma_alloc_ucontext
,
486 .create_cq
= erdma_create_cq
,
487 .create_qp
= erdma_create_qp
,
488 .dealloc_pd
= erdma_dealloc_pd
,
489 .dealloc_ucontext
= erdma_dealloc_ucontext
,
490 .dereg_mr
= erdma_dereg_mr
,
491 .destroy_cq
= erdma_destroy_cq
,
492 .destroy_qp
= erdma_destroy_qp
,
493 .disassociate_ucontext
= erdma_disassociate_ucontext
,
494 .get_dma_mr
= erdma_get_dma_mr
,
495 .get_hw_stats
= erdma_get_hw_stats
,
496 .get_port_immutable
= erdma_get_port_immutable
,
497 .iw_accept
= erdma_accept
,
498 .iw_add_ref
= erdma_qp_get_ref
,
499 .iw_connect
= erdma_connect
,
500 .iw_create_listen
= erdma_create_listen
,
501 .iw_destroy_listen
= erdma_destroy_listen
,
502 .iw_get_qp
= erdma_get_ibqp
,
503 .iw_reject
= erdma_reject
,
504 .iw_rem_ref
= erdma_qp_put_ref
,
505 .map_mr_sg
= erdma_map_mr_sg
,
507 .mmap_free
= erdma_mmap_free
,
508 .modify_qp
= erdma_modify_qp
,
509 .post_recv
= erdma_post_recv
,
510 .post_send
= erdma_post_send
,
511 .poll_cq
= erdma_poll_cq
,
512 .query_device
= erdma_query_device
,
513 .query_gid
= erdma_query_gid
,
514 .query_port
= erdma_query_port
,
515 .query_qp
= erdma_query_qp
,
516 .req_notify_cq
= erdma_req_notify_cq
,
517 .reg_user_mr
= erdma_reg_user_mr
,
519 INIT_RDMA_OBJ_SIZE(ib_cq
, erdma_cq
, ibcq
),
520 INIT_RDMA_OBJ_SIZE(ib_pd
, erdma_pd
, ibpd
),
521 INIT_RDMA_OBJ_SIZE(ib_ucontext
, erdma_ucontext
, ibucontext
),
522 INIT_RDMA_OBJ_SIZE(ib_qp
, erdma_qp
, ibqp
),
525 static int erdma_ib_device_add(struct pci_dev
*pdev
)
527 struct erdma_dev
*dev
= pci_get_drvdata(pdev
);
528 struct ib_device
*ibdev
= &dev
->ibdev
;
532 ret
= erdma_dev_attrs_init(dev
);
536 ret
= erdma_device_config(dev
);
540 ibdev
->node_type
= RDMA_NODE_RNIC
;
541 memcpy(ibdev
->node_desc
, ERDMA_NODE_DESC
, sizeof(ERDMA_NODE_DESC
));
544 * Current model (one-to-one device association):
545 * One ERDMA device per net_device or, equivalently,
548 ibdev
->phys_port_cnt
= 1;
549 ibdev
->num_comp_vectors
= dev
->attrs
.irq_num
- 1;
551 ib_set_device_ops(ibdev
, &erdma_device_ops
);
553 INIT_LIST_HEAD(&dev
->cep_list
);
555 spin_lock_init(&dev
->lock
);
556 xa_init_flags(&dev
->qp_xa
, XA_FLAGS_ALLOC1
);
557 xa_init_flags(&dev
->cq_xa
, XA_FLAGS_ALLOC1
);
558 dev
->next_alloc_cqn
= 1;
559 dev
->next_alloc_qpn
= 1;
561 ret
= erdma_res_cb_init(dev
);
565 atomic_set(&dev
->num_ctx
, 0);
567 mac
= erdma_reg_read32(dev
, ERDMA_REGS_NETDEV_MAC_L_REG
);
568 mac
|= (u64
)erdma_reg_read32(dev
, ERDMA_REGS_NETDEV_MAC_H_REG
) << 32;
570 u64_to_ether_addr(mac
, dev
->attrs
.peer_addr
);
572 dev
->reflush_wq
= alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND
,
573 WQ_UNBOUND_MAX_ACTIVE
);
574 if (!dev
->reflush_wq
) {
576 goto err_alloc_workqueue
;
579 ret
= erdma_device_register(dev
);
586 destroy_workqueue(dev
->reflush_wq
);
588 xa_destroy(&dev
->qp_xa
);
589 xa_destroy(&dev
->cq_xa
);
591 erdma_res_cb_free(dev
);
596 static void erdma_ib_device_remove(struct pci_dev
*pdev
)
598 struct erdma_dev
*dev
= pci_get_drvdata(pdev
);
600 unregister_netdevice_notifier(&dev
->netdev_nb
);
601 ib_unregister_device(&dev
->ibdev
);
603 destroy_workqueue(dev
->reflush_wq
);
604 erdma_res_cb_free(dev
);
605 xa_destroy(&dev
->qp_xa
);
606 xa_destroy(&dev
->cq_xa
);
609 static int erdma_probe(struct pci_dev
*pdev
, const struct pci_device_id
*ent
)
613 ret
= erdma_probe_dev(pdev
);
617 ret
= erdma_ib_device_add(pdev
);
619 erdma_remove_dev(pdev
);
626 static void erdma_remove(struct pci_dev
*pdev
)
628 erdma_ib_device_remove(pdev
);
629 erdma_remove_dev(pdev
);
632 static struct pci_driver erdma_pci_driver
= {
633 .name
= DRV_MODULE_NAME
,
634 .id_table
= erdma_pci_tbl
,
635 .probe
= erdma_probe
,
636 .remove
= erdma_remove
639 MODULE_DEVICE_TABLE(pci
, erdma_pci_tbl
);
641 static __init
int erdma_init_module(void)
645 ret
= erdma_cm_init();
649 ret
= pci_register_driver(&erdma_pci_driver
);
656 static void __exit
erdma_exit_module(void)
658 pci_unregister_driver(&erdma_pci_driver
);
663 module_init(erdma_init_module
);
664 module_exit(erdma_exit_module
);