1 // SPDX-License-Identifier: GPL-2.0
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /* Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
7 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
8 /* Copyright (c) 2008-2019, IBM Corporation */
10 /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
12 #include <linux/vmalloc.h>
13 #include <net/addrconf.h>
14 #include <rdma/erdma-abi.h>
15 #include <rdma/ib_umem.h>
16 #include <rdma/uverbs_ioctl.h>
20 #include "erdma_verbs.h"
22 static void assemble_qbuf_mtt_for_cmd(struct erdma_mem
*mem
, u32
*cfg
,
23 u64
*addr0
, u64
*addr1
)
25 struct erdma_mtt
*mtt
= mem
->mtt
;
27 if (mem
->mtt_nents
> ERDMA_MAX_INLINE_MTT_ENTRIES
) {
28 *addr0
= mtt
->buf_dma
;
29 *cfg
|= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK
,
33 memcpy(addr1
, mtt
->buf
+ 1, MTT_SIZE(mem
->mtt_nents
- 1));
34 *cfg
|= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK
,
39 static int create_qp_cmd(struct erdma_ucontext
*uctx
, struct erdma_qp
*qp
)
41 struct erdma_dev
*dev
= to_edev(qp
->ibqp
.device
);
42 struct erdma_pd
*pd
= to_epd(qp
->ibqp
.pd
);
43 struct erdma_cmdq_create_qp_req req
;
44 struct erdma_uqp
*user_qp
;
48 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_RDMA
,
49 CMDQ_OPCODE_CREATE_QP
);
51 req
.cfg0
= FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK
,
52 ilog2(qp
->attrs
.sq_size
)) |
53 FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK
, QP_ID(qp
));
54 req
.cfg1
= FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK
,
55 ilog2(qp
->attrs
.rq_size
)) |
56 FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK
, pd
->pdn
);
58 if (rdma_is_kernel_res(&qp
->ibqp
.res
)) {
59 u32 pgsz_range
= ilog2(SZ_1M
) - ERDMA_HW_PAGE_SHIFT
;
62 FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK
,
64 FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK
, qp
->scq
->cqn
);
66 FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK
,
68 FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK
, qp
->rcq
->cqn
);
71 FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK
, 0) |
72 FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK
, 1) |
73 FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK
,
75 req
.rq_mtt_cfg
= req
.sq_mtt_cfg
;
77 req
.rq_buf_addr
= qp
->kern_qp
.rq_buf_dma_addr
;
78 req
.sq_buf_addr
= qp
->kern_qp
.sq_buf_dma_addr
;
79 req
.sq_dbrec_dma
= qp
->kern_qp
.sq_dbrec_dma
;
80 req
.rq_dbrec_dma
= qp
->kern_qp
.rq_dbrec_dma
;
82 user_qp
= &qp
->user_qp
;
83 req
.sq_cqn_mtt_cfg
= FIELD_PREP(
84 ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK
,
85 ilog2(user_qp
->sq_mem
.page_size
) - ERDMA_HW_PAGE_SHIFT
);
87 FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK
, qp
->scq
->cqn
);
89 req
.rq_cqn_mtt_cfg
= FIELD_PREP(
90 ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK
,
91 ilog2(user_qp
->rq_mem
.page_size
) - ERDMA_HW_PAGE_SHIFT
);
93 FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK
, qp
->rcq
->cqn
);
95 req
.sq_mtt_cfg
= user_qp
->sq_mem
.page_offset
;
96 req
.sq_mtt_cfg
|= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK
,
97 user_qp
->sq_mem
.mtt_nents
);
99 req
.rq_mtt_cfg
= user_qp
->rq_mem
.page_offset
;
100 req
.rq_mtt_cfg
|= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK
,
101 user_qp
->rq_mem
.mtt_nents
);
103 assemble_qbuf_mtt_for_cmd(&user_qp
->sq_mem
, &req
.sq_mtt_cfg
,
104 &req
.sq_buf_addr
, req
.sq_mtt_entry
);
105 assemble_qbuf_mtt_for_cmd(&user_qp
->rq_mem
, &req
.rq_mtt_cfg
,
106 &req
.rq_buf_addr
, req
.rq_mtt_entry
);
108 req
.sq_dbrec_dma
= user_qp
->sq_dbrec_dma
;
109 req
.rq_dbrec_dma
= user_qp
->rq_dbrec_dma
;
111 if (uctx
->ext_db
.enable
) {
112 req
.sq_cqn_mtt_cfg
|=
113 FIELD_PREP(ERDMA_CMD_CREATE_QP_DB_CFG_MASK
, 1);
115 FIELD_PREP(ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK
,
116 uctx
->ext_db
.sdb_off
) |
117 FIELD_PREP(ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK
,
118 uctx
->ext_db
.rdb_off
);
122 err
= erdma_post_cmd_wait(&dev
->cmdq
, &req
, sizeof(req
), &resp0
,
126 FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK
, resp0
);
131 static int regmr_cmd(struct erdma_dev
*dev
, struct erdma_mr
*mr
)
133 struct erdma_pd
*pd
= to_epd(mr
->ibmr
.pd
);
134 u32 mtt_level
= ERDMA_MR_MTT_0LEVEL
;
135 struct erdma_cmdq_reg_mr_req req
;
137 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_RDMA
, CMDQ_OPCODE_REG_MR
);
139 if (mr
->type
== ERDMA_MR_TYPE_FRMR
||
140 mr
->mem
.page_cnt
> ERDMA_MAX_INLINE_MTT_ENTRIES
) {
141 if (mr
->mem
.mtt
->continuous
) {
142 req
.phy_addr
[0] = mr
->mem
.mtt
->buf_dma
;
143 mtt_level
= ERDMA_MR_MTT_1LEVEL
;
145 req
.phy_addr
[0] = sg_dma_address(mr
->mem
.mtt
->sglist
);
146 mtt_level
= mr
->mem
.mtt
->level
;
148 } else if (mr
->type
!= ERDMA_MR_TYPE_DMA
) {
149 memcpy(req
.phy_addr
, mr
->mem
.mtt
->buf
,
150 MTT_SIZE(mr
->mem
.page_cnt
));
153 req
.cfg0
= FIELD_PREP(ERDMA_CMD_MR_VALID_MASK
, mr
->valid
) |
154 FIELD_PREP(ERDMA_CMD_MR_KEY_MASK
, mr
->ibmr
.lkey
& 0xFF) |
155 FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK
, mr
->ibmr
.lkey
>> 8);
156 req
.cfg1
= FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK
, pd
->pdn
) |
157 FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK
, mr
->type
) |
158 FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK
, mr
->access
);
159 req
.cfg2
= FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK
,
160 ilog2(mr
->mem
.page_size
)) |
161 FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK
, mtt_level
) |
162 FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK
, mr
->mem
.page_cnt
);
164 if (mr
->type
== ERDMA_MR_TYPE_DMA
)
167 if (mr
->type
== ERDMA_MR_TYPE_NORMAL
) {
168 req
.start_va
= mr
->mem
.va
;
169 req
.size
= mr
->mem
.len
;
172 if (!mr
->mem
.mtt
->continuous
&& mr
->mem
.mtt
->level
> 1) {
173 req
.cfg0
|= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK
, 1);
174 req
.cfg2
|= FIELD_PREP(ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK
,
175 PAGE_SHIFT
- ERDMA_HW_PAGE_SHIFT
);
176 req
.size_h
= upper_32_bits(mr
->mem
.len
);
177 req
.mtt_cnt_h
= mr
->mem
.page_cnt
>> 20;
181 return erdma_post_cmd_wait(&dev
->cmdq
, &req
, sizeof(req
), NULL
, NULL
);
184 static int create_cq_cmd(struct erdma_ucontext
*uctx
, struct erdma_cq
*cq
)
186 struct erdma_dev
*dev
= to_edev(cq
->ibcq
.device
);
187 struct erdma_cmdq_create_cq_req req
;
188 struct erdma_mem
*mem
;
191 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_RDMA
,
192 CMDQ_OPCODE_CREATE_CQ
);
194 req
.cfg0
= FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK
, cq
->cqn
) |
195 FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK
, ilog2(cq
->depth
));
196 req
.cfg1
= FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK
, cq
->assoc_eqn
);
198 if (rdma_is_kernel_res(&cq
->ibcq
.res
)) {
200 req
.cfg0
|= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK
,
201 ilog2(page_size
) - ERDMA_HW_PAGE_SHIFT
);
202 req
.qbuf_addr_l
= lower_32_bits(cq
->kern_cq
.qbuf_dma_addr
);
203 req
.qbuf_addr_h
= upper_32_bits(cq
->kern_cq
.qbuf_dma_addr
);
205 req
.cfg1
|= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK
, 1) |
206 FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK
,
207 ERDMA_MR_MTT_0LEVEL
);
209 req
.first_page_offset
= 0;
210 req
.cq_dbrec_dma
= cq
->kern_cq
.dbrec_dma
;
212 mem
= &cq
->user_cq
.qbuf_mem
;
214 FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK
,
215 ilog2(mem
->page_size
) - ERDMA_HW_PAGE_SHIFT
);
216 if (mem
->mtt_nents
== 1) {
217 req
.qbuf_addr_l
= lower_32_bits(mem
->mtt
->buf
[0]);
218 req
.qbuf_addr_h
= upper_32_bits(mem
->mtt
->buf
[0]);
220 FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK
,
221 ERDMA_MR_MTT_0LEVEL
);
223 req
.qbuf_addr_l
= lower_32_bits(mem
->mtt
->buf_dma
);
224 req
.qbuf_addr_h
= upper_32_bits(mem
->mtt
->buf_dma
);
226 FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK
,
227 ERDMA_MR_MTT_1LEVEL
);
229 req
.cfg1
|= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK
,
232 req
.first_page_offset
= mem
->page_offset
;
233 req
.cq_dbrec_dma
= cq
->user_cq
.dbrec_dma
;
235 if (uctx
->ext_db
.enable
) {
236 req
.cfg1
|= FIELD_PREP(
237 ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK
, 1);
238 req
.cfg2
= FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK
,
239 uctx
->ext_db
.cdb_off
);
243 return erdma_post_cmd_wait(&dev
->cmdq
, &req
, sizeof(req
), NULL
, NULL
);
246 static int erdma_alloc_idx(struct erdma_resource_cb
*res_cb
)
251 spin_lock_irqsave(&res_cb
->lock
, flags
);
252 idx
= find_next_zero_bit(res_cb
->bitmap
, res_cb
->max_cap
,
253 res_cb
->next_alloc_idx
);
254 if (idx
== res_cb
->max_cap
) {
255 idx
= find_first_zero_bit(res_cb
->bitmap
, res_cb
->max_cap
);
256 if (idx
== res_cb
->max_cap
) {
257 res_cb
->next_alloc_idx
= 1;
258 spin_unlock_irqrestore(&res_cb
->lock
, flags
);
263 set_bit(idx
, res_cb
->bitmap
);
264 res_cb
->next_alloc_idx
= idx
+ 1;
265 spin_unlock_irqrestore(&res_cb
->lock
, flags
);
270 static inline void erdma_free_idx(struct erdma_resource_cb
*res_cb
, u32 idx
)
275 spin_lock_irqsave(&res_cb
->lock
, flags
);
276 used
= __test_and_clear_bit(idx
, res_cb
->bitmap
);
277 spin_unlock_irqrestore(&res_cb
->lock
, flags
);
281 static struct rdma_user_mmap_entry
*
282 erdma_user_mmap_entry_insert(struct erdma_ucontext
*uctx
, void *address
,
283 u32 size
, u8 mmap_flag
, u64
*mmap_offset
)
285 struct erdma_user_mmap_entry
*entry
=
286 kzalloc(sizeof(*entry
), GFP_KERNEL
);
292 entry
->address
= (u64
)address
;
293 entry
->mmap_flag
= mmap_flag
;
295 size
= PAGE_ALIGN(size
);
297 ret
= rdma_user_mmap_entry_insert(&uctx
->ibucontext
, &entry
->rdma_entry
,
304 *mmap_offset
= rdma_user_mmap_get_offset(&entry
->rdma_entry
);
306 return &entry
->rdma_entry
;
309 int erdma_query_device(struct ib_device
*ibdev
, struct ib_device_attr
*attr
,
310 struct ib_udata
*unused
)
312 struct erdma_dev
*dev
= to_edev(ibdev
);
314 memset(attr
, 0, sizeof(*attr
));
316 attr
->max_mr_size
= dev
->attrs
.max_mr_size
;
317 attr
->vendor_id
= PCI_VENDOR_ID_ALIBABA
;
318 attr
->vendor_part_id
= dev
->pdev
->device
;
319 attr
->hw_ver
= dev
->pdev
->revision
;
320 attr
->max_qp
= dev
->attrs
.max_qp
- 1;
321 attr
->max_qp_wr
= min(dev
->attrs
.max_send_wr
, dev
->attrs
.max_recv_wr
);
322 attr
->max_qp_rd_atom
= dev
->attrs
.max_ord
;
323 attr
->max_qp_init_rd_atom
= dev
->attrs
.max_ird
;
324 attr
->max_res_rd_atom
= dev
->attrs
.max_qp
* dev
->attrs
.max_ird
;
325 attr
->device_cap_flags
= IB_DEVICE_MEM_MGT_EXTENSIONS
;
326 attr
->kernel_cap_flags
= IBK_LOCAL_DMA_LKEY
;
327 ibdev
->local_dma_lkey
= dev
->attrs
.local_dma_key
;
328 attr
->max_send_sge
= dev
->attrs
.max_send_sge
;
329 attr
->max_recv_sge
= dev
->attrs
.max_recv_sge
;
330 attr
->max_sge_rd
= dev
->attrs
.max_sge_rd
;
331 attr
->max_cq
= dev
->attrs
.max_cq
- 1;
332 attr
->max_cqe
= dev
->attrs
.max_cqe
;
333 attr
->max_mr
= dev
->attrs
.max_mr
;
334 attr
->max_pd
= dev
->attrs
.max_pd
;
335 attr
->max_mw
= dev
->attrs
.max_mw
;
336 attr
->max_fast_reg_page_list_len
= ERDMA_MAX_FRMR_PA
;
337 attr
->page_size_cap
= ERDMA_PAGE_SIZE_SUPPORT
;
339 if (dev
->attrs
.cap_flags
& ERDMA_DEV_CAP_FLAGS_ATOMIC
)
340 attr
->atomic_cap
= IB_ATOMIC_GLOB
;
342 attr
->fw_ver
= dev
->attrs
.fw_version
;
345 addrconf_addr_eui48((u8
*)&attr
->sys_image_guid
,
346 dev
->netdev
->dev_addr
);
351 int erdma_query_gid(struct ib_device
*ibdev
, u32 port
, int idx
,
354 struct erdma_dev
*dev
= to_edev(ibdev
);
356 memset(gid
, 0, sizeof(*gid
));
357 ether_addr_copy(gid
->raw
, dev
->attrs
.peer_addr
);
362 int erdma_query_port(struct ib_device
*ibdev
, u32 port
,
363 struct ib_port_attr
*attr
)
365 struct erdma_dev
*dev
= to_edev(ibdev
);
366 struct net_device
*ndev
= dev
->netdev
;
368 memset(attr
, 0, sizeof(*attr
));
370 attr
->gid_tbl_len
= 1;
371 attr
->port_cap_flags
= IB_PORT_CM_SUP
| IB_PORT_DEVICE_MGMT_SUP
;
372 attr
->max_msg_sz
= -1;
377 ib_get_eth_speed(ibdev
, port
, &attr
->active_speed
, &attr
->active_width
);
378 attr
->max_mtu
= ib_mtu_int_to_enum(ndev
->mtu
);
379 attr
->active_mtu
= ib_mtu_int_to_enum(ndev
->mtu
);
380 if (netif_running(ndev
) && netif_carrier_ok(ndev
))
381 dev
->state
= IB_PORT_ACTIVE
;
383 dev
->state
= IB_PORT_DOWN
;
384 attr
->state
= dev
->state
;
387 if (dev
->state
== IB_PORT_ACTIVE
)
388 attr
->phys_state
= IB_PORT_PHYS_STATE_LINK_UP
;
390 attr
->phys_state
= IB_PORT_PHYS_STATE_DISABLED
;
395 int erdma_get_port_immutable(struct ib_device
*ibdev
, u32 port
,
396 struct ib_port_immutable
*port_immutable
)
398 port_immutable
->gid_tbl_len
= 1;
399 port_immutable
->core_cap_flags
= RDMA_CORE_PORT_IWARP
;
404 int erdma_alloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
406 struct erdma_pd
*pd
= to_epd(ibpd
);
407 struct erdma_dev
*dev
= to_edev(ibpd
->device
);
410 pdn
= erdma_alloc_idx(&dev
->res_cb
[ERDMA_RES_TYPE_PD
]);
419 int erdma_dealloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
421 struct erdma_pd
*pd
= to_epd(ibpd
);
422 struct erdma_dev
*dev
= to_edev(ibpd
->device
);
424 erdma_free_idx(&dev
->res_cb
[ERDMA_RES_TYPE_PD
], pd
->pdn
);
429 static void erdma_flush_worker(struct work_struct
*work
)
431 struct delayed_work
*dwork
= to_delayed_work(work
);
432 struct erdma_qp
*qp
=
433 container_of(dwork
, struct erdma_qp
, reflush_dwork
);
434 struct erdma_cmdq_reflush_req req
;
436 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_RDMA
,
437 CMDQ_OPCODE_REFLUSH
);
439 req
.sq_pi
= qp
->kern_qp
.sq_pi
;
440 req
.rq_pi
= qp
->kern_qp
.rq_pi
;
441 erdma_post_cmd_wait(&qp
->dev
->cmdq
, &req
, sizeof(req
), NULL
, NULL
);
444 static int erdma_qp_validate_cap(struct erdma_dev
*dev
,
445 struct ib_qp_init_attr
*attrs
)
447 if ((attrs
->cap
.max_send_wr
> dev
->attrs
.max_send_wr
) ||
448 (attrs
->cap
.max_recv_wr
> dev
->attrs
.max_recv_wr
) ||
449 (attrs
->cap
.max_send_sge
> dev
->attrs
.max_send_sge
) ||
450 (attrs
->cap
.max_recv_sge
> dev
->attrs
.max_recv_sge
) ||
451 (attrs
->cap
.max_inline_data
> ERDMA_MAX_INLINE
) ||
452 !attrs
->cap
.max_send_wr
|| !attrs
->cap
.max_recv_wr
) {
459 static int erdma_qp_validate_attr(struct erdma_dev
*dev
,
460 struct ib_qp_init_attr
*attrs
)
462 if (attrs
->qp_type
!= IB_QPT_RC
)
468 if (!attrs
->send_cq
|| !attrs
->recv_cq
)
474 static void free_kernel_qp(struct erdma_qp
*qp
)
476 struct erdma_dev
*dev
= qp
->dev
;
478 vfree(qp
->kern_qp
.swr_tbl
);
479 vfree(qp
->kern_qp
.rwr_tbl
);
481 if (qp
->kern_qp
.sq_buf
)
482 dma_free_coherent(&dev
->pdev
->dev
,
483 qp
->attrs
.sq_size
<< SQEBB_SHIFT
,
485 qp
->kern_qp
.sq_buf_dma_addr
);
487 if (qp
->kern_qp
.sq_dbrec
)
488 dma_pool_free(dev
->db_pool
, qp
->kern_qp
.sq_dbrec
,
489 qp
->kern_qp
.sq_dbrec_dma
);
491 if (qp
->kern_qp
.rq_buf
)
492 dma_free_coherent(&dev
->pdev
->dev
,
493 qp
->attrs
.rq_size
<< RQE_SHIFT
,
495 qp
->kern_qp
.rq_buf_dma_addr
);
497 if (qp
->kern_qp
.rq_dbrec
)
498 dma_pool_free(dev
->db_pool
, qp
->kern_qp
.rq_dbrec
,
499 qp
->kern_qp
.rq_dbrec_dma
);
502 static int init_kernel_qp(struct erdma_dev
*dev
, struct erdma_qp
*qp
,
503 struct ib_qp_init_attr
*attrs
)
505 struct erdma_kqp
*kqp
= &qp
->kern_qp
;
508 if (attrs
->sq_sig_type
== IB_SIGNAL_ALL_WR
)
516 dev
->func_bar
+ (ERDMA_SDB_SHARED_PAGE_INDEX
<< PAGE_SHIFT
);
517 kqp
->hw_rq_db
= dev
->func_bar
+ ERDMA_BAR_RQDB_SPACE_OFFSET
;
519 kqp
->swr_tbl
= vmalloc_array(qp
->attrs
.sq_size
, sizeof(u64
));
520 kqp
->rwr_tbl
= vmalloc_array(qp
->attrs
.rq_size
, sizeof(u64
));
521 if (!kqp
->swr_tbl
|| !kqp
->rwr_tbl
)
524 size
= qp
->attrs
.sq_size
<< SQEBB_SHIFT
;
525 kqp
->sq_buf
= dma_alloc_coherent(&dev
->pdev
->dev
, size
,
526 &kqp
->sq_buf_dma_addr
, GFP_KERNEL
);
531 dma_pool_zalloc(dev
->db_pool
, GFP_KERNEL
, &kqp
->sq_dbrec_dma
);
535 size
= qp
->attrs
.rq_size
<< RQE_SHIFT
;
536 kqp
->rq_buf
= dma_alloc_coherent(&dev
->pdev
->dev
, size
,
537 &kqp
->rq_buf_dma_addr
, GFP_KERNEL
);
542 dma_pool_zalloc(dev
->db_pool
, GFP_KERNEL
, &kqp
->rq_dbrec_dma
);
553 static void erdma_fill_bottom_mtt(struct erdma_dev
*dev
, struct erdma_mem
*mem
)
555 struct erdma_mtt
*mtt
= mem
->mtt
;
556 struct ib_block_iter biter
;
559 while (mtt
->low_level
)
560 mtt
= mtt
->low_level
;
562 rdma_umem_for_each_dma_block(mem
->umem
, &biter
, mem
->page_size
)
563 mtt
->buf
[idx
++] = rdma_block_iter_dma_address(&biter
);
566 static struct erdma_mtt
*erdma_create_cont_mtt(struct erdma_dev
*dev
,
569 struct erdma_mtt
*mtt
;
571 mtt
= kzalloc(sizeof(*mtt
), GFP_KERNEL
);
573 return ERR_PTR(-ENOMEM
);
576 mtt
->buf
= kzalloc(mtt
->size
, GFP_KERNEL
);
580 mtt
->continuous
= true;
581 mtt
->buf_dma
= dma_map_single(&dev
->pdev
->dev
, mtt
->buf
, mtt
->size
,
583 if (dma_mapping_error(&dev
->pdev
->dev
, mtt
->buf_dma
))
584 goto err_free_mtt_buf
;
594 return ERR_PTR(-ENOMEM
);
597 static void erdma_destroy_mtt_buf_sg(struct erdma_dev
*dev
,
598 struct erdma_mtt
*mtt
)
600 dma_unmap_sg(&dev
->pdev
->dev
, mtt
->sglist
, mtt
->nsg
, DMA_TO_DEVICE
);
604 static void erdma_destroy_scatter_mtt(struct erdma_dev
*dev
,
605 struct erdma_mtt
*mtt
)
607 erdma_destroy_mtt_buf_sg(dev
, mtt
);
612 static void erdma_init_middle_mtt(struct erdma_mtt
*mtt
,
613 struct erdma_mtt
*low_mtt
)
615 struct scatterlist
*sg
;
618 for_each_sg(low_mtt
->sglist
, sg
, low_mtt
->nsg
, i
)
619 mtt
->buf
[idx
++] = sg_dma_address(sg
);
622 static int erdma_create_mtt_buf_sg(struct erdma_dev
*dev
, struct erdma_mtt
*mtt
)
624 struct scatterlist
*sglist
;
625 void *buf
= mtt
->buf
;
629 /* Failed if buf is not page aligned */
630 if ((uintptr_t)buf
& ~PAGE_MASK
)
633 npages
= DIV_ROUND_UP(mtt
->size
, PAGE_SIZE
);
634 sglist
= vzalloc(npages
* sizeof(*sglist
));
638 sg_init_table(sglist
, npages
);
639 for (i
= 0; i
< npages
; i
++) {
640 pg
= vmalloc_to_page(buf
);
643 sg_set_page(&sglist
[i
], pg
, PAGE_SIZE
, 0);
647 nsg
= dma_map_sg(&dev
->pdev
->dev
, sglist
, npages
, DMA_TO_DEVICE
);
651 mtt
->sglist
= sglist
;
661 static struct erdma_mtt
*erdma_create_scatter_mtt(struct erdma_dev
*dev
,
664 struct erdma_mtt
*mtt
;
667 mtt
= kzalloc(sizeof(*mtt
), GFP_KERNEL
);
669 return ERR_PTR(-ENOMEM
);
671 mtt
->size
= ALIGN(size
, PAGE_SIZE
);
672 mtt
->buf
= vzalloc(mtt
->size
);
673 mtt
->continuous
= false;
677 ret
= erdma_create_mtt_buf_sg(dev
, mtt
);
679 goto err_free_mtt_buf
;
681 ibdev_dbg(&dev
->ibdev
, "create scatter mtt, size:%lu, nsg:%u\n",
682 mtt
->size
, mtt
->nsg
);
695 static struct erdma_mtt
*erdma_create_mtt(struct erdma_dev
*dev
, size_t size
,
696 bool force_continuous
)
698 struct erdma_mtt
*mtt
, *tmp_mtt
;
701 ibdev_dbg(&dev
->ibdev
, "create_mtt, size:%lu, force cont:%d\n", size
,
704 if (!(dev
->attrs
.cap_flags
& ERDMA_DEV_CAP_FLAGS_MTT_VA
))
705 force_continuous
= true;
707 if (force_continuous
)
708 return erdma_create_cont_mtt(dev
, size
);
710 mtt
= erdma_create_scatter_mtt(dev
, size
);
715 /* convergence the mtt table. */
716 while (mtt
->nsg
!= 1 && level
<= 3) {
717 tmp_mtt
= erdma_create_scatter_mtt(dev
, MTT_SIZE(mtt
->nsg
));
718 if (IS_ERR(tmp_mtt
)) {
719 ret
= PTR_ERR(tmp_mtt
);
722 erdma_init_middle_mtt(tmp_mtt
, mtt
);
723 tmp_mtt
->low_level
= mtt
;
734 ibdev_dbg(&dev
->ibdev
, "top mtt: level:%d, dma_addr 0x%llx\n",
735 mtt
->level
, mtt
->sglist
[0].dma_address
);
740 tmp_mtt
= mtt
->low_level
;
741 erdma_destroy_scatter_mtt(dev
, mtt
);
748 static void erdma_destroy_mtt(struct erdma_dev
*dev
, struct erdma_mtt
*mtt
)
750 struct erdma_mtt
*tmp_mtt
;
752 if (mtt
->continuous
) {
753 dma_unmap_single(&dev
->pdev
->dev
, mtt
->buf_dma
, mtt
->size
,
759 tmp_mtt
= mtt
->low_level
;
760 erdma_destroy_scatter_mtt(dev
, mtt
);
766 static int get_mtt_entries(struct erdma_dev
*dev
, struct erdma_mem
*mem
,
767 u64 start
, u64 len
, int access
, u64 virt
,
768 unsigned long req_page_size
, bool force_continuous
)
772 mem
->umem
= ib_umem_get(&dev
->ibdev
, start
, len
, access
);
773 if (IS_ERR(mem
->umem
)) {
774 ret
= PTR_ERR(mem
->umem
);
781 mem
->page_size
= ib_umem_find_best_pgsz(mem
->umem
, req_page_size
, virt
);
782 mem
->page_offset
= start
& (mem
->page_size
- 1);
783 mem
->mtt_nents
= ib_umem_num_dma_blocks(mem
->umem
, mem
->page_size
);
784 mem
->page_cnt
= mem
->mtt_nents
;
785 mem
->mtt
= erdma_create_mtt(dev
, MTT_SIZE(mem
->page_cnt
),
787 if (IS_ERR(mem
->mtt
)) {
788 ret
= PTR_ERR(mem
->mtt
);
792 erdma_fill_bottom_mtt(dev
, mem
);
798 ib_umem_release(mem
->umem
);
805 static void put_mtt_entries(struct erdma_dev
*dev
, struct erdma_mem
*mem
)
808 erdma_destroy_mtt(dev
, mem
->mtt
);
811 ib_umem_release(mem
->umem
);
816 static int erdma_map_user_dbrecords(struct erdma_ucontext
*ctx
,
818 struct erdma_user_dbrecords_page
**dbr_page
,
819 dma_addr_t
*dma_addr
)
821 struct erdma_user_dbrecords_page
*page
= NULL
;
824 mutex_lock(&ctx
->dbrecords_page_mutex
);
826 list_for_each_entry(page
, &ctx
->dbrecords_page_list
, list
)
827 if (page
->va
== (dbrecords_va
& PAGE_MASK
))
830 page
= kmalloc(sizeof(*page
), GFP_KERNEL
);
836 page
->va
= (dbrecords_va
& PAGE_MASK
);
839 page
->umem
= ib_umem_get(ctx
->ibucontext
.device
,
840 dbrecords_va
& PAGE_MASK
, PAGE_SIZE
, 0);
841 if (IS_ERR(page
->umem
)) {
842 rv
= PTR_ERR(page
->umem
);
847 list_add(&page
->list
, &ctx
->dbrecords_page_list
);
850 *dma_addr
= sg_dma_address(page
->umem
->sgt_append
.sgt
.sgl
) +
851 (dbrecords_va
& ~PAGE_MASK
);
856 mutex_unlock(&ctx
->dbrecords_page_mutex
);
861 erdma_unmap_user_dbrecords(struct erdma_ucontext
*ctx
,
862 struct erdma_user_dbrecords_page
**dbr_page
)
864 if (!ctx
|| !(*dbr_page
))
867 mutex_lock(&ctx
->dbrecords_page_mutex
);
868 if (--(*dbr_page
)->refcnt
== 0) {
869 list_del(&(*dbr_page
)->list
);
870 ib_umem_release((*dbr_page
)->umem
);
875 mutex_unlock(&ctx
->dbrecords_page_mutex
);
878 static int init_user_qp(struct erdma_qp
*qp
, struct erdma_ucontext
*uctx
,
879 u64 va
, u32 len
, u64 dbrec_va
)
881 dma_addr_t dbrec_dma
;
885 if (len
< (ALIGN(qp
->attrs
.sq_size
* SQEBB_SIZE
, ERDMA_HW_PAGE_SIZE
) +
886 qp
->attrs
.rq_size
* RQE_SIZE
))
889 ret
= get_mtt_entries(qp
->dev
, &qp
->user_qp
.sq_mem
, va
,
890 qp
->attrs
.sq_size
<< SQEBB_SHIFT
, 0, va
,
891 (SZ_1M
- SZ_4K
), true);
895 rq_offset
= ALIGN(qp
->attrs
.sq_size
<< SQEBB_SHIFT
, ERDMA_HW_PAGE_SIZE
);
896 qp
->user_qp
.rq_offset
= rq_offset
;
898 ret
= get_mtt_entries(qp
->dev
, &qp
->user_qp
.rq_mem
, va
+ rq_offset
,
899 qp
->attrs
.rq_size
<< RQE_SHIFT
, 0, va
+ rq_offset
,
900 (SZ_1M
- SZ_4K
), true);
904 ret
= erdma_map_user_dbrecords(uctx
, dbrec_va
,
905 &qp
->user_qp
.user_dbr_page
,
910 qp
->user_qp
.sq_dbrec_dma
= dbrec_dma
;
911 qp
->user_qp
.rq_dbrec_dma
= dbrec_dma
+ ERDMA_DB_SIZE
;
916 put_mtt_entries(qp
->dev
, &qp
->user_qp
.rq_mem
);
919 put_mtt_entries(qp
->dev
, &qp
->user_qp
.sq_mem
);
924 static void free_user_qp(struct erdma_qp
*qp
, struct erdma_ucontext
*uctx
)
926 put_mtt_entries(qp
->dev
, &qp
->user_qp
.sq_mem
);
927 put_mtt_entries(qp
->dev
, &qp
->user_qp
.rq_mem
);
928 erdma_unmap_user_dbrecords(uctx
, &qp
->user_qp
.user_dbr_page
);
931 int erdma_create_qp(struct ib_qp
*ibqp
, struct ib_qp_init_attr
*attrs
,
932 struct ib_udata
*udata
)
934 struct erdma_qp
*qp
= to_eqp(ibqp
);
935 struct erdma_dev
*dev
= to_edev(ibqp
->device
);
936 struct erdma_ucontext
*uctx
= rdma_udata_to_drv_context(
937 udata
, struct erdma_ucontext
, ibucontext
);
938 struct erdma_ureq_create_qp ureq
;
939 struct erdma_uresp_create_qp uresp
;
942 ret
= erdma_qp_validate_cap(dev
, attrs
);
946 ret
= erdma_qp_validate_attr(dev
, attrs
);
950 qp
->scq
= to_ecq(attrs
->send_cq
);
951 qp
->rcq
= to_ecq(attrs
->recv_cq
);
953 qp
->attrs
.cc
= dev
->attrs
.cc
;
955 init_rwsem(&qp
->state_lock
);
957 init_completion(&qp
->safe_free
);
959 ret
= xa_alloc_cyclic(&dev
->qp_xa
, &qp
->ibqp
.qp_num
, qp
,
960 XA_LIMIT(1, dev
->attrs
.max_qp
- 1),
961 &dev
->next_alloc_qpn
, GFP_KERNEL
);
967 qp
->attrs
.sq_size
= roundup_pow_of_two(attrs
->cap
.max_send_wr
*
968 ERDMA_MAX_WQEBB_PER_SQE
);
969 qp
->attrs
.rq_size
= roundup_pow_of_two(attrs
->cap
.max_recv_wr
);
972 ret
= ib_copy_from_udata(&ureq
, udata
,
973 min(sizeof(ureq
), udata
->inlen
));
977 ret
= init_user_qp(qp
, uctx
, ureq
.qbuf_va
, ureq
.qbuf_len
,
982 memset(&uresp
, 0, sizeof(uresp
));
984 uresp
.num_sqe
= qp
->attrs
.sq_size
;
985 uresp
.num_rqe
= qp
->attrs
.rq_size
;
986 uresp
.qp_id
= QP_ID(qp
);
987 uresp
.rq_offset
= qp
->user_qp
.rq_offset
;
989 ret
= ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
993 init_kernel_qp(dev
, qp
, attrs
);
996 qp
->attrs
.max_send_sge
= attrs
->cap
.max_send_sge
;
997 qp
->attrs
.max_recv_sge
= attrs
->cap
.max_recv_sge
;
998 qp
->attrs
.state
= ERDMA_QP_STATE_IDLE
;
999 INIT_DELAYED_WORK(&qp
->reflush_dwork
, erdma_flush_worker
);
1001 ret
= create_qp_cmd(uctx
, qp
);
1005 spin_lock_init(&qp
->lock
);
1011 free_user_qp(qp
, uctx
);
1015 xa_erase(&dev
->qp_xa
, QP_ID(qp
));
1020 static int erdma_create_stag(struct erdma_dev
*dev
, u32
*stag
)
1024 stag_idx
= erdma_alloc_idx(&dev
->res_cb
[ERDMA_RES_TYPE_STAG_IDX
]);
1028 /* For now, we always let key field be zero. */
1029 *stag
= (stag_idx
<< 8);
1034 struct ib_mr
*erdma_get_dma_mr(struct ib_pd
*ibpd
, int acc
)
1036 struct erdma_dev
*dev
= to_edev(ibpd
->device
);
1037 struct erdma_mr
*mr
;
1041 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1043 return ERR_PTR(-ENOMEM
);
1045 ret
= erdma_create_stag(dev
, &stag
);
1049 mr
->type
= ERDMA_MR_TYPE_DMA
;
1051 mr
->ibmr
.lkey
= stag
;
1052 mr
->ibmr
.rkey
= stag
;
1054 mr
->access
= ERDMA_MR_ACC_LR
| to_erdma_access_flags(acc
);
1055 ret
= regmr_cmd(dev
, mr
);
1057 goto out_remove_stag
;
1062 erdma_free_idx(&dev
->res_cb
[ERDMA_RES_TYPE_STAG_IDX
],
1063 mr
->ibmr
.lkey
>> 8);
1068 return ERR_PTR(ret
);
1071 struct ib_mr
*erdma_ib_alloc_mr(struct ib_pd
*ibpd
, enum ib_mr_type mr_type
,
1074 struct erdma_mr
*mr
;
1075 struct erdma_dev
*dev
= to_edev(ibpd
->device
);
1079 if (mr_type
!= IB_MR_TYPE_MEM_REG
)
1080 return ERR_PTR(-EOPNOTSUPP
);
1082 if (max_num_sg
> ERDMA_MR_MAX_MTT_CNT
)
1083 return ERR_PTR(-EINVAL
);
1085 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1087 return ERR_PTR(-ENOMEM
);
1089 ret
= erdma_create_stag(dev
, &stag
);
1093 mr
->type
= ERDMA_MR_TYPE_FRMR
;
1095 mr
->ibmr
.lkey
= stag
;
1096 mr
->ibmr
.rkey
= stag
;
1098 /* update it in FRMR. */
1099 mr
->access
= ERDMA_MR_ACC_LR
| ERDMA_MR_ACC_LW
| ERDMA_MR_ACC_RR
|
1102 mr
->mem
.page_size
= PAGE_SIZE
; /* update it later. */
1103 mr
->mem
.page_cnt
= max_num_sg
;
1104 mr
->mem
.mtt
= erdma_create_mtt(dev
, MTT_SIZE(max_num_sg
), true);
1105 if (IS_ERR(mr
->mem
.mtt
)) {
1106 ret
= PTR_ERR(mr
->mem
.mtt
);
1107 goto out_remove_stag
;
1110 ret
= regmr_cmd(dev
, mr
);
1112 goto out_destroy_mtt
;
1117 erdma_destroy_mtt(dev
, mr
->mem
.mtt
);
1120 erdma_free_idx(&dev
->res_cb
[ERDMA_RES_TYPE_STAG_IDX
],
1121 mr
->ibmr
.lkey
>> 8);
1126 return ERR_PTR(ret
);
1129 static int erdma_set_page(struct ib_mr
*ibmr
, u64 addr
)
1131 struct erdma_mr
*mr
= to_emr(ibmr
);
1133 if (mr
->mem
.mtt_nents
>= mr
->mem
.page_cnt
)
1136 mr
->mem
.mtt
->buf
[mr
->mem
.mtt_nents
] = addr
;
1137 mr
->mem
.mtt_nents
++;
1142 int erdma_map_mr_sg(struct ib_mr
*ibmr
, struct scatterlist
*sg
, int sg_nents
,
1143 unsigned int *sg_offset
)
1145 struct erdma_mr
*mr
= to_emr(ibmr
);
1148 mr
->mem
.mtt_nents
= 0;
1150 num
= ib_sg_to_pages(&mr
->ibmr
, sg
, sg_nents
, sg_offset
,
1156 struct ib_mr
*erdma_reg_user_mr(struct ib_pd
*ibpd
, u64 start
, u64 len
,
1157 u64 virt
, int access
, struct ib_udata
*udata
)
1159 struct erdma_mr
*mr
= NULL
;
1160 struct erdma_dev
*dev
= to_edev(ibpd
->device
);
1164 if (!len
|| len
> dev
->attrs
.max_mr_size
)
1165 return ERR_PTR(-EINVAL
);
1167 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1169 return ERR_PTR(-ENOMEM
);
1171 ret
= get_mtt_entries(dev
, &mr
->mem
, start
, len
, access
, virt
,
1172 SZ_2G
- SZ_4K
, false);
1176 ret
= erdma_create_stag(dev
, &stag
);
1178 goto err_out_put_mtt
;
1180 mr
->ibmr
.lkey
= mr
->ibmr
.rkey
= stag
;
1184 mr
->access
= ERDMA_MR_ACC_LR
| to_erdma_access_flags(access
);
1186 mr
->type
= ERDMA_MR_TYPE_NORMAL
;
1188 ret
= regmr_cmd(dev
, mr
);
1195 erdma_free_idx(&dev
->res_cb
[ERDMA_RES_TYPE_STAG_IDX
],
1196 mr
->ibmr
.lkey
>> 8);
1199 put_mtt_entries(dev
, &mr
->mem
);
1204 return ERR_PTR(ret
);
1207 int erdma_dereg_mr(struct ib_mr
*ibmr
, struct ib_udata
*udata
)
1209 struct erdma_mr
*mr
;
1210 struct erdma_dev
*dev
= to_edev(ibmr
->device
);
1211 struct erdma_cmdq_dereg_mr_req req
;
1216 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_RDMA
,
1217 CMDQ_OPCODE_DEREG_MR
);
1219 req
.cfg
= FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK
, ibmr
->lkey
>> 8) |
1220 FIELD_PREP(ERDMA_CMD_MR_KEY_MASK
, ibmr
->lkey
& 0xFF);
1222 ret
= erdma_post_cmd_wait(&dev
->cmdq
, &req
, sizeof(req
), NULL
, NULL
);
1226 erdma_free_idx(&dev
->res_cb
[ERDMA_RES_TYPE_STAG_IDX
], ibmr
->lkey
>> 8);
1228 put_mtt_entries(dev
, &mr
->mem
);
1234 int erdma_destroy_cq(struct ib_cq
*ibcq
, struct ib_udata
*udata
)
1236 struct erdma_cq
*cq
= to_ecq(ibcq
);
1237 struct erdma_dev
*dev
= to_edev(ibcq
->device
);
1238 struct erdma_ucontext
*ctx
= rdma_udata_to_drv_context(
1239 udata
, struct erdma_ucontext
, ibucontext
);
1241 struct erdma_cmdq_destroy_cq_req req
;
1243 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_RDMA
,
1244 CMDQ_OPCODE_DESTROY_CQ
);
1247 err
= erdma_post_cmd_wait(&dev
->cmdq
, &req
, sizeof(req
), NULL
, NULL
);
1251 if (rdma_is_kernel_res(&cq
->ibcq
.res
)) {
1252 dma_free_coherent(&dev
->pdev
->dev
, cq
->depth
<< CQE_SHIFT
,
1253 cq
->kern_cq
.qbuf
, cq
->kern_cq
.qbuf_dma_addr
);
1254 dma_pool_free(dev
->db_pool
, cq
->kern_cq
.dbrec
,
1255 cq
->kern_cq
.dbrec_dma
);
1257 erdma_unmap_user_dbrecords(ctx
, &cq
->user_cq
.user_dbr_page
);
1258 put_mtt_entries(dev
, &cq
->user_cq
.qbuf_mem
);
1261 xa_erase(&dev
->cq_xa
, cq
->cqn
);
1266 int erdma_destroy_qp(struct ib_qp
*ibqp
, struct ib_udata
*udata
)
1268 struct erdma_qp
*qp
= to_eqp(ibqp
);
1269 struct erdma_dev
*dev
= to_edev(ibqp
->device
);
1270 struct erdma_ucontext
*ctx
= rdma_udata_to_drv_context(
1271 udata
, struct erdma_ucontext
, ibucontext
);
1272 struct erdma_qp_attrs qp_attrs
;
1274 struct erdma_cmdq_destroy_qp_req req
;
1276 down_write(&qp
->state_lock
);
1277 qp_attrs
.state
= ERDMA_QP_STATE_ERROR
;
1278 erdma_modify_qp_internal(qp
, &qp_attrs
, ERDMA_QP_ATTR_STATE
);
1279 up_write(&qp
->state_lock
);
1281 cancel_delayed_work_sync(&qp
->reflush_dwork
);
1283 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_RDMA
,
1284 CMDQ_OPCODE_DESTROY_QP
);
1285 req
.qpn
= QP_ID(qp
);
1287 err
= erdma_post_cmd_wait(&dev
->cmdq
, &req
, sizeof(req
), NULL
, NULL
);
1292 wait_for_completion(&qp
->safe_free
);
1294 if (rdma_is_kernel_res(&qp
->ibqp
.res
)) {
1297 put_mtt_entries(dev
, &qp
->user_qp
.sq_mem
);
1298 put_mtt_entries(dev
, &qp
->user_qp
.rq_mem
);
1299 erdma_unmap_user_dbrecords(ctx
, &qp
->user_qp
.user_dbr_page
);
1303 erdma_cep_put(qp
->cep
);
1304 xa_erase(&dev
->qp_xa
, QP_ID(qp
));
1309 void erdma_qp_get_ref(struct ib_qp
*ibqp
)
1311 erdma_qp_get(to_eqp(ibqp
));
1314 void erdma_qp_put_ref(struct ib_qp
*ibqp
)
1316 erdma_qp_put(to_eqp(ibqp
));
1319 int erdma_mmap(struct ib_ucontext
*ctx
, struct vm_area_struct
*vma
)
1321 struct rdma_user_mmap_entry
*rdma_entry
;
1322 struct erdma_user_mmap_entry
*entry
;
1326 rdma_entry
= rdma_user_mmap_entry_get(ctx
, vma
);
1330 entry
= to_emmap(rdma_entry
);
1332 switch (entry
->mmap_flag
) {
1333 case ERDMA_MMAP_IO_NC
:
1335 prot
= pgprot_device(vma
->vm_page_prot
);
1342 err
= rdma_user_mmap_io(ctx
, vma
, PFN_DOWN(entry
->address
), PAGE_SIZE
,
1346 rdma_user_mmap_entry_put(rdma_entry
);
1350 void erdma_mmap_free(struct rdma_user_mmap_entry
*rdma_entry
)
1352 struct erdma_user_mmap_entry
*entry
= to_emmap(rdma_entry
);
1357 static int alloc_db_resources(struct erdma_dev
*dev
, struct erdma_ucontext
*ctx
,
1360 struct erdma_cmdq_ext_db_req req
= {};
1365 * CAP_SYS_RAWIO is required if hardware does not support extend
1366 * doorbell mechanism.
1368 if (!ext_db_en
&& !capable(CAP_SYS_RAWIO
))
1372 ctx
->sdb
= dev
->func_bar_addr
+ ERDMA_BAR_SQDB_SPACE_OFFSET
;
1373 ctx
->rdb
= dev
->func_bar_addr
+ ERDMA_BAR_RQDB_SPACE_OFFSET
;
1374 ctx
->cdb
= dev
->func_bar_addr
+ ERDMA_BAR_CQDB_SPACE_OFFSET
;
1378 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_COMMON
,
1379 CMDQ_OPCODE_ALLOC_DB
);
1381 req
.cfg
= FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK
, 1) |
1382 FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK
, 1) |
1383 FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK
, 1);
1385 ret
= erdma_post_cmd_wait(&dev
->cmdq
, &req
, sizeof(req
), &val0
, &val1
);
1389 ctx
->ext_db
.enable
= true;
1390 ctx
->ext_db
.sdb_off
= ERDMA_GET(val0
, ALLOC_DB_RESP_SDB
);
1391 ctx
->ext_db
.rdb_off
= ERDMA_GET(val0
, ALLOC_DB_RESP_RDB
);
1392 ctx
->ext_db
.cdb_off
= ERDMA_GET(val0
, ALLOC_DB_RESP_CDB
);
1394 ctx
->sdb
= dev
->func_bar_addr
+ (ctx
->ext_db
.sdb_off
<< PAGE_SHIFT
);
1395 ctx
->cdb
= dev
->func_bar_addr
+ (ctx
->ext_db
.rdb_off
<< PAGE_SHIFT
);
1396 ctx
->rdb
= dev
->func_bar_addr
+ (ctx
->ext_db
.cdb_off
<< PAGE_SHIFT
);
1401 static void free_db_resources(struct erdma_dev
*dev
, struct erdma_ucontext
*ctx
)
1403 struct erdma_cmdq_ext_db_req req
= {};
1406 if (!ctx
->ext_db
.enable
)
1409 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_COMMON
,
1410 CMDQ_OPCODE_FREE_DB
);
1412 req
.cfg
= FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK
, 1) |
1413 FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK
, 1) |
1414 FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK
, 1);
1416 req
.sdb_off
= ctx
->ext_db
.sdb_off
;
1417 req
.rdb_off
= ctx
->ext_db
.rdb_off
;
1418 req
.cdb_off
= ctx
->ext_db
.cdb_off
;
1420 ret
= erdma_post_cmd_wait(&dev
->cmdq
, &req
, sizeof(req
), NULL
, NULL
);
1422 ibdev_err_ratelimited(&dev
->ibdev
,
1423 "free db resources failed %d", ret
);
1426 static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext
*uctx
)
1428 rdma_user_mmap_entry_remove(uctx
->sq_db_mmap_entry
);
1429 rdma_user_mmap_entry_remove(uctx
->rq_db_mmap_entry
);
1430 rdma_user_mmap_entry_remove(uctx
->cq_db_mmap_entry
);
1433 int erdma_alloc_ucontext(struct ib_ucontext
*ibctx
, struct ib_udata
*udata
)
1435 struct erdma_ucontext
*ctx
= to_ectx(ibctx
);
1436 struct erdma_dev
*dev
= to_edev(ibctx
->device
);
1438 struct erdma_uresp_alloc_ctx uresp
= {};
1440 if (atomic_inc_return(&dev
->num_ctx
) > ERDMA_MAX_CONTEXT
) {
1445 if (udata
->outlen
< sizeof(uresp
)) {
1450 INIT_LIST_HEAD(&ctx
->dbrecords_page_list
);
1451 mutex_init(&ctx
->dbrecords_page_mutex
);
1453 ret
= alloc_db_resources(dev
, ctx
,
1454 !!(dev
->attrs
.cap_flags
&
1455 ERDMA_DEV_CAP_FLAGS_EXTEND_DB
));
1459 ctx
->sq_db_mmap_entry
= erdma_user_mmap_entry_insert(
1460 ctx
, (void *)ctx
->sdb
, PAGE_SIZE
, ERDMA_MMAP_IO_NC
, &uresp
.sdb
);
1461 if (!ctx
->sq_db_mmap_entry
) {
1463 goto err_free_ext_db
;
1466 ctx
->rq_db_mmap_entry
= erdma_user_mmap_entry_insert(
1467 ctx
, (void *)ctx
->rdb
, PAGE_SIZE
, ERDMA_MMAP_IO_NC
, &uresp
.rdb
);
1468 if (!ctx
->rq_db_mmap_entry
) {
1470 goto err_put_mmap_entries
;
1473 ctx
->cq_db_mmap_entry
= erdma_user_mmap_entry_insert(
1474 ctx
, (void *)ctx
->cdb
, PAGE_SIZE
, ERDMA_MMAP_IO_NC
, &uresp
.cdb
);
1475 if (!ctx
->cq_db_mmap_entry
) {
1477 goto err_put_mmap_entries
;
1480 uresp
.dev_id
= dev
->pdev
->device
;
1482 ret
= ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
1484 goto err_put_mmap_entries
;
1488 err_put_mmap_entries
:
1489 erdma_uctx_user_mmap_entries_remove(ctx
);
1492 free_db_resources(dev
, ctx
);
1495 atomic_dec(&dev
->num_ctx
);
1499 void erdma_dealloc_ucontext(struct ib_ucontext
*ibctx
)
1501 struct erdma_dev
*dev
= to_edev(ibctx
->device
);
1502 struct erdma_ucontext
*ctx
= to_ectx(ibctx
);
1504 erdma_uctx_user_mmap_entries_remove(ctx
);
1505 free_db_resources(dev
, ctx
);
1506 atomic_dec(&dev
->num_ctx
);
1509 static int ib_qp_state_to_erdma_qp_state
[IB_QPS_ERR
+ 1] = {
1510 [IB_QPS_RESET
] = ERDMA_QP_STATE_IDLE
,
1511 [IB_QPS_INIT
] = ERDMA_QP_STATE_IDLE
,
1512 [IB_QPS_RTR
] = ERDMA_QP_STATE_RTR
,
1513 [IB_QPS_RTS
] = ERDMA_QP_STATE_RTS
,
1514 [IB_QPS_SQD
] = ERDMA_QP_STATE_CLOSING
,
1515 [IB_QPS_SQE
] = ERDMA_QP_STATE_TERMINATE
,
1516 [IB_QPS_ERR
] = ERDMA_QP_STATE_ERROR
1519 int erdma_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
, int attr_mask
,
1520 struct ib_udata
*udata
)
1522 struct erdma_qp_attrs new_attrs
;
1523 enum erdma_qp_attr_mask erdma_attr_mask
= 0;
1524 struct erdma_qp
*qp
= to_eqp(ibqp
);
1527 if (attr_mask
& ~IB_QP_ATTR_STANDARD_BITS
)
1530 memset(&new_attrs
, 0, sizeof(new_attrs
));
1532 if (attr_mask
& IB_QP_STATE
) {
1533 new_attrs
.state
= ib_qp_state_to_erdma_qp_state
[attr
->qp_state
];
1535 erdma_attr_mask
|= ERDMA_QP_ATTR_STATE
;
1538 down_write(&qp
->state_lock
);
1540 ret
= erdma_modify_qp_internal(qp
, &new_attrs
, erdma_attr_mask
);
1542 up_write(&qp
->state_lock
);
1547 static enum ib_qp_state
query_qp_state(struct erdma_qp
*qp
)
1549 switch (qp
->attrs
.state
) {
1550 case ERDMA_QP_STATE_IDLE
:
1552 case ERDMA_QP_STATE_RTR
:
1554 case ERDMA_QP_STATE_RTS
:
1556 case ERDMA_QP_STATE_CLOSING
:
1558 case ERDMA_QP_STATE_TERMINATE
:
1560 case ERDMA_QP_STATE_ERROR
:
1567 int erdma_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*qp_attr
,
1568 int qp_attr_mask
, struct ib_qp_init_attr
*qp_init_attr
)
1570 struct erdma_dev
*dev
;
1571 struct erdma_qp
*qp
;
1573 if (ibqp
&& qp_attr
&& qp_init_attr
) {
1575 dev
= to_edev(ibqp
->device
);
1580 qp_attr
->cap
.max_inline_data
= ERDMA_MAX_INLINE
;
1581 qp_init_attr
->cap
.max_inline_data
= ERDMA_MAX_INLINE
;
1583 qp_attr
->cap
.max_send_wr
= qp
->attrs
.sq_size
;
1584 qp_attr
->cap
.max_recv_wr
= qp
->attrs
.rq_size
;
1585 qp_attr
->cap
.max_send_sge
= qp
->attrs
.max_send_sge
;
1586 qp_attr
->cap
.max_recv_sge
= qp
->attrs
.max_recv_sge
;
1588 qp_attr
->path_mtu
= ib_mtu_int_to_enum(dev
->netdev
->mtu
);
1589 qp_attr
->max_rd_atomic
= qp
->attrs
.irq_size
;
1590 qp_attr
->max_dest_rd_atomic
= qp
->attrs
.orq_size
;
1592 qp_attr
->qp_access_flags
= IB_ACCESS_LOCAL_WRITE
|
1593 IB_ACCESS_REMOTE_WRITE
|
1594 IB_ACCESS_REMOTE_READ
;
1596 qp_init_attr
->cap
= qp_attr
->cap
;
1598 qp_attr
->qp_state
= query_qp_state(qp
);
1599 qp_attr
->cur_qp_state
= query_qp_state(qp
);
1604 static int erdma_init_user_cq(struct erdma_ucontext
*ctx
, struct erdma_cq
*cq
,
1605 struct erdma_ureq_create_cq
*ureq
)
1608 struct erdma_dev
*dev
= to_edev(cq
->ibcq
.device
);
1610 ret
= get_mtt_entries(dev
, &cq
->user_cq
.qbuf_mem
, ureq
->qbuf_va
,
1611 ureq
->qbuf_len
, 0, ureq
->qbuf_va
, SZ_64M
- SZ_4K
,
1616 ret
= erdma_map_user_dbrecords(ctx
, ureq
->db_record_va
,
1617 &cq
->user_cq
.user_dbr_page
,
1618 &cq
->user_cq
.dbrec_dma
);
1620 put_mtt_entries(dev
, &cq
->user_cq
.qbuf_mem
);
1625 static int erdma_init_kernel_cq(struct erdma_cq
*cq
)
1627 struct erdma_dev
*dev
= to_edev(cq
->ibcq
.device
);
1630 dma_alloc_coherent(&dev
->pdev
->dev
, cq
->depth
<< CQE_SHIFT
,
1631 &cq
->kern_cq
.qbuf_dma_addr
, GFP_KERNEL
);
1632 if (!cq
->kern_cq
.qbuf
)
1635 cq
->kern_cq
.dbrec
= dma_pool_zalloc(dev
->db_pool
, GFP_KERNEL
,
1636 &cq
->kern_cq
.dbrec_dma
);
1637 if (!cq
->kern_cq
.dbrec
)
1640 spin_lock_init(&cq
->kern_cq
.lock
);
1641 /* use default cqdb addr */
1642 cq
->kern_cq
.db
= dev
->func_bar
+ ERDMA_BAR_CQDB_SPACE_OFFSET
;
1647 dma_free_coherent(&dev
->pdev
->dev
, cq
->depth
<< CQE_SHIFT
,
1648 cq
->kern_cq
.qbuf
, cq
->kern_cq
.qbuf_dma_addr
);
1653 int erdma_create_cq(struct ib_cq
*ibcq
, const struct ib_cq_init_attr
*attr
,
1654 struct uverbs_attr_bundle
*attrs
)
1656 struct ib_udata
*udata
= &attrs
->driver_udata
;
1657 struct erdma_cq
*cq
= to_ecq(ibcq
);
1658 struct erdma_dev
*dev
= to_edev(ibcq
->device
);
1659 unsigned int depth
= attr
->cqe
;
1661 struct erdma_ucontext
*ctx
= rdma_udata_to_drv_context(
1662 udata
, struct erdma_ucontext
, ibucontext
);
1664 if (depth
> dev
->attrs
.max_cqe
)
1667 depth
= roundup_pow_of_two(depth
);
1668 cq
->ibcq
.cqe
= depth
;
1670 cq
->assoc_eqn
= attr
->comp_vector
+ 1;
1672 ret
= xa_alloc_cyclic(&dev
->cq_xa
, &cq
->cqn
, cq
,
1673 XA_LIMIT(1, dev
->attrs
.max_cq
- 1),
1674 &dev
->next_alloc_cqn
, GFP_KERNEL
);
1678 if (!rdma_is_kernel_res(&ibcq
->res
)) {
1679 struct erdma_ureq_create_cq ureq
;
1680 struct erdma_uresp_create_cq uresp
;
1682 ret
= ib_copy_from_udata(&ureq
, udata
,
1683 min(udata
->inlen
, sizeof(ureq
)));
1687 ret
= erdma_init_user_cq(ctx
, cq
, &ureq
);
1691 uresp
.cq_id
= cq
->cqn
;
1692 uresp
.num_cqe
= depth
;
1694 ret
= ib_copy_to_udata(udata
, &uresp
,
1695 min(sizeof(uresp
), udata
->outlen
));
1699 ret
= erdma_init_kernel_cq(cq
);
1704 ret
= create_cq_cmd(ctx
, cq
);
1711 if (!rdma_is_kernel_res(&ibcq
->res
)) {
1712 erdma_unmap_user_dbrecords(ctx
, &cq
->user_cq
.user_dbr_page
);
1713 put_mtt_entries(dev
, &cq
->user_cq
.qbuf_mem
);
1715 dma_free_coherent(&dev
->pdev
->dev
, depth
<< CQE_SHIFT
,
1716 cq
->kern_cq
.qbuf
, cq
->kern_cq
.qbuf_dma_addr
);
1717 dma_pool_free(dev
->db_pool
, cq
->kern_cq
.dbrec
,
1718 cq
->kern_cq
.dbrec_dma
);
1722 xa_erase(&dev
->cq_xa
, cq
->cqn
);
1727 void erdma_disassociate_ucontext(struct ib_ucontext
*ibcontext
)
1731 void erdma_set_mtu(struct erdma_dev
*dev
, u32 mtu
)
1733 struct erdma_cmdq_config_mtu_req req
;
1735 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_COMMON
,
1736 CMDQ_OPCODE_CONF_MTU
);
1739 erdma_post_cmd_wait(&dev
->cmdq
, &req
, sizeof(req
), NULL
, NULL
);
1742 void erdma_port_event(struct erdma_dev
*dev
, enum ib_event_type reason
)
1744 struct ib_event event
;
1746 event
.device
= &dev
->ibdev
;
1747 event
.element
.port_num
= 1;
1748 event
.event
= reason
;
1750 ib_dispatch_event(&event
);
1754 ERDMA_STATS_TX_REQS_CNT
,
1755 ERDMA_STATS_TX_PACKETS_CNT
,
1756 ERDMA_STATS_TX_BYTES_CNT
,
1757 ERDMA_STATS_TX_DISABLE_DROP_CNT
,
1758 ERDMA_STATS_TX_BPS_METER_DROP_CNT
,
1759 ERDMA_STATS_TX_PPS_METER_DROP_CNT
,
1761 ERDMA_STATS_RX_PACKETS_CNT
,
1762 ERDMA_STATS_RX_BYTES_CNT
,
1763 ERDMA_STATS_RX_DISABLE_DROP_CNT
,
1764 ERDMA_STATS_RX_BPS_METER_DROP_CNT
,
1765 ERDMA_STATS_RX_PPS_METER_DROP_CNT
,
1770 static const struct rdma_stat_desc erdma_descs
[] = {
1771 [ERDMA_STATS_TX_REQS_CNT
].name
= "tx_reqs_cnt",
1772 [ERDMA_STATS_TX_PACKETS_CNT
].name
= "tx_packets_cnt",
1773 [ERDMA_STATS_TX_BYTES_CNT
].name
= "tx_bytes_cnt",
1774 [ERDMA_STATS_TX_DISABLE_DROP_CNT
].name
= "tx_disable_drop_cnt",
1775 [ERDMA_STATS_TX_BPS_METER_DROP_CNT
].name
= "tx_bps_limit_drop_cnt",
1776 [ERDMA_STATS_TX_PPS_METER_DROP_CNT
].name
= "tx_pps_limit_drop_cnt",
1777 [ERDMA_STATS_RX_PACKETS_CNT
].name
= "rx_packets_cnt",
1778 [ERDMA_STATS_RX_BYTES_CNT
].name
= "rx_bytes_cnt",
1779 [ERDMA_STATS_RX_DISABLE_DROP_CNT
].name
= "rx_disable_drop_cnt",
1780 [ERDMA_STATS_RX_BPS_METER_DROP_CNT
].name
= "rx_bps_limit_drop_cnt",
1781 [ERDMA_STATS_RX_PPS_METER_DROP_CNT
].name
= "rx_pps_limit_drop_cnt",
1784 struct rdma_hw_stats
*erdma_alloc_hw_port_stats(struct ib_device
*device
,
1787 return rdma_alloc_hw_stats_struct(erdma_descs
, ERDMA_STATS_MAX
,
1788 RDMA_HW_STATS_DEFAULT_LIFESPAN
);
1791 static int erdma_query_hw_stats(struct erdma_dev
*dev
,
1792 struct rdma_hw_stats
*stats
)
1794 struct erdma_cmdq_query_stats_resp
*resp
;
1795 struct erdma_cmdq_query_req req
;
1796 dma_addr_t dma_addr
;
1799 erdma_cmdq_build_reqhdr(&req
.hdr
, CMDQ_SUBMOD_COMMON
,
1800 CMDQ_OPCODE_GET_STATS
);
1802 resp
= dma_pool_zalloc(dev
->resp_pool
, GFP_KERNEL
, &dma_addr
);
1806 req
.target_addr
= dma_addr
;
1807 req
.target_length
= ERDMA_HW_RESP_SIZE
;
1809 err
= erdma_post_cmd_wait(&dev
->cmdq
, &req
, sizeof(req
), NULL
, NULL
);
1813 if (resp
->hdr
.magic
!= ERDMA_HW_RESP_MAGIC
) {
1818 memcpy(&stats
->value
[0], &resp
->tx_req_cnt
,
1819 sizeof(u64
) * stats
->num_counters
);
1822 dma_pool_free(dev
->resp_pool
, resp
, dma_addr
);
1827 int erdma_get_hw_stats(struct ib_device
*ibdev
, struct rdma_hw_stats
*stats
,
1828 u32 port
, int index
)
1830 struct erdma_dev
*dev
= to_edev(ibdev
);
1836 ret
= erdma_query_hw_stats(dev
, stats
);
1840 return stats
->num_counters
;