1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
6 #include <linux/dma-buf.h>
7 #include <linux/dma-resv.h>
8 #include <linux/vmalloc.h>
9 #include <linux/log2.h>
11 #include <rdma/ib_addr.h>
12 #include <rdma/ib_umem.h>
13 #include <rdma/ib_user_verbs.h>
14 #include <rdma/ib_verbs.h>
15 #include <rdma/uverbs_ioctl.h>
16 #define UVERBS_MODULE_NAME efa_ib
17 #include <rdma/uverbs_named_ioctl.h>
18 #include <rdma/ib_user_ioctl_cmds.h>
21 #include "efa_io_defs.h"
24 EFA_MMAP_DMA_PAGE
= 0,
29 struct efa_user_mmap_entry
{
30 struct rdma_user_mmap_entry rdma_entry
;
35 #define EFA_DEFINE_DEVICE_STATS(op) \
36 op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
37 op(EFA_COMPLETED_CMDS, "completed_cmds") \
38 op(EFA_CMDS_ERR, "cmds_err") \
39 op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
40 op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
41 op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
42 op(EFA_CREATE_QP_ERR, "create_qp_err") \
43 op(EFA_CREATE_CQ_ERR, "create_cq_err") \
44 op(EFA_REG_MR_ERR, "reg_mr_err") \
45 op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
46 op(EFA_CREATE_AH_ERR, "create_ah_err") \
47 op(EFA_MMAP_ERR, "mmap_err")
49 #define EFA_DEFINE_PORT_STATS(op) \
50 op(EFA_TX_BYTES, "tx_bytes") \
51 op(EFA_TX_PKTS, "tx_pkts") \
52 op(EFA_RX_BYTES, "rx_bytes") \
53 op(EFA_RX_PKTS, "rx_pkts") \
54 op(EFA_RX_DROPS, "rx_drops") \
55 op(EFA_SEND_BYTES, "send_bytes") \
56 op(EFA_SEND_WRS, "send_wrs") \
57 op(EFA_RECV_BYTES, "recv_bytes") \
58 op(EFA_RECV_WRS, "recv_wrs") \
59 op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \
60 op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \
61 op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \
62 op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
63 op(EFA_RDMA_WRITE_WRS, "rdma_write_wrs") \
64 op(EFA_RDMA_WRITE_BYTES, "rdma_write_bytes") \
65 op(EFA_RDMA_WRITE_WR_ERR, "rdma_write_wr_err") \
66 op(EFA_RDMA_WRITE_RECV_BYTES, "rdma_write_recv_bytes") \
68 #define EFA_STATS_ENUM(ename, name) ename,
69 #define EFA_STATS_STR(ename, nam) \
72 enum efa_hw_device_stats
{
73 EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM
)
76 static const struct rdma_stat_desc efa_device_stats_descs
[] = {
77 EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR
)
80 enum efa_hw_port_stats
{
81 EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM
)
84 static const struct rdma_stat_desc efa_port_stats_descs
[] = {
85 EFA_DEFINE_PORT_STATS(EFA_STATS_STR
)
88 #define EFA_DEFAULT_LINK_SPEED_GBPS 100
90 #define EFA_CHUNK_PAYLOAD_SHIFT 12
91 #define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
92 #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8
94 #define EFA_CHUNK_SHIFT 12
95 #define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT)
96 #define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info)
98 #define EFA_PTRS_PER_CHUNK \
99 ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
101 #define EFA_CHUNK_USED_SIZE \
102 ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
110 struct pbl_chunk_list
{
111 struct pbl_chunk
*chunks
;
121 u32 pbl_buf_size_in_pages
;
122 struct scatterlist
*sgl
;
124 struct pbl_chunk_list chunk_list
;
128 u32 pbl_buf_size_in_bytes
;
129 u8 physically_continuous
;
132 static inline struct efa_dev
*to_edev(struct ib_device
*ibdev
)
134 return container_of(ibdev
, struct efa_dev
, ibdev
);
137 static inline struct efa_ucontext
*to_eucontext(struct ib_ucontext
*ibucontext
)
139 return container_of(ibucontext
, struct efa_ucontext
, ibucontext
);
142 static inline struct efa_pd
*to_epd(struct ib_pd
*ibpd
)
144 return container_of(ibpd
, struct efa_pd
, ibpd
);
147 static inline struct efa_mr
*to_emr(struct ib_mr
*ibmr
)
149 return container_of(ibmr
, struct efa_mr
, ibmr
);
152 static inline struct efa_qp
*to_eqp(struct ib_qp
*ibqp
)
154 return container_of(ibqp
, struct efa_qp
, ibqp
);
157 static inline struct efa_cq
*to_ecq(struct ib_cq
*ibcq
)
159 return container_of(ibcq
, struct efa_cq
, ibcq
);
162 static inline struct efa_ah
*to_eah(struct ib_ah
*ibah
)
164 return container_of(ibah
, struct efa_ah
, ibah
);
167 static inline struct efa_user_mmap_entry
*
168 to_emmap(struct rdma_user_mmap_entry
*rdma_entry
)
170 return container_of(rdma_entry
, struct efa_user_mmap_entry
, rdma_entry
);
173 #define EFA_DEV_CAP(dev, cap) \
174 ((dev)->dev_attr.device_caps & \
175 EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK)
177 #define is_reserved_cleared(reserved) \
178 !memchr_inv(reserved, 0, sizeof(reserved))
180 static void *efa_zalloc_mapped(struct efa_dev
*dev
, dma_addr_t
*dma_addr
,
181 size_t size
, enum dma_data_direction dir
)
185 addr
= alloc_pages_exact(size
, GFP_KERNEL
| __GFP_ZERO
);
189 *dma_addr
= dma_map_single(&dev
->pdev
->dev
, addr
, size
, dir
);
190 if (dma_mapping_error(&dev
->pdev
->dev
, *dma_addr
)) {
191 ibdev_err(&dev
->ibdev
, "Failed to map DMA address\n");
192 free_pages_exact(addr
, size
);
199 static void efa_free_mapped(struct efa_dev
*dev
, void *cpu_addr
,
201 size_t size
, enum dma_data_direction dir
)
203 dma_unmap_single(&dev
->pdev
->dev
, dma_addr
, size
, dir
);
204 free_pages_exact(cpu_addr
, size
);
207 int efa_query_device(struct ib_device
*ibdev
,
208 struct ib_device_attr
*props
,
209 struct ib_udata
*udata
)
211 struct efa_com_get_device_attr_result
*dev_attr
;
212 struct efa_ibv_ex_query_device_resp resp
= {};
213 struct efa_dev
*dev
= to_edev(ibdev
);
216 if (udata
&& udata
->inlen
&&
217 !ib_is_udata_cleared(udata
, 0, udata
->inlen
)) {
219 "Incompatible ABI params, udata not cleared\n");
223 dev_attr
= &dev
->dev_attr
;
225 memset(props
, 0, sizeof(*props
));
226 props
->max_mr_size
= dev_attr
->max_mr_pages
* PAGE_SIZE
;
227 props
->page_size_cap
= dev_attr
->page_size_cap
;
228 props
->vendor_id
= dev
->pdev
->vendor
;
229 props
->vendor_part_id
= dev
->pdev
->device
;
230 props
->hw_ver
= dev
->pdev
->subsystem_device
;
231 props
->max_qp
= dev_attr
->max_qp
;
232 props
->max_cq
= dev_attr
->max_cq
;
233 props
->max_pd
= dev_attr
->max_pd
;
234 props
->max_mr
= dev_attr
->max_mr
;
235 props
->max_ah
= dev_attr
->max_ah
;
236 props
->max_cqe
= dev_attr
->max_cq_depth
;
237 props
->max_qp_wr
= min_t(u32
, dev_attr
->max_sq_depth
,
238 dev_attr
->max_rq_depth
);
239 props
->max_send_sge
= dev_attr
->max_sq_sge
;
240 props
->max_recv_sge
= dev_attr
->max_rq_sge
;
241 props
->max_sge_rd
= dev_attr
->max_wr_rdma_sge
;
242 props
->max_pkeys
= 1;
244 if (udata
&& udata
->outlen
) {
245 resp
.max_sq_sge
= dev_attr
->max_sq_sge
;
246 resp
.max_rq_sge
= dev_attr
->max_rq_sge
;
247 resp
.max_sq_wr
= dev_attr
->max_sq_depth
;
248 resp
.max_rq_wr
= dev_attr
->max_rq_depth
;
249 resp
.max_rdma_size
= dev_attr
->max_rdma_size
;
251 resp
.device_caps
|= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID
;
252 if (EFA_DEV_CAP(dev
, RDMA_READ
))
253 resp
.device_caps
|= EFA_QUERY_DEVICE_CAPS_RDMA_READ
;
255 if (EFA_DEV_CAP(dev
, RNR_RETRY
))
256 resp
.device_caps
|= EFA_QUERY_DEVICE_CAPS_RNR_RETRY
;
258 if (EFA_DEV_CAP(dev
, DATA_POLLING_128
))
259 resp
.device_caps
|= EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128
;
261 if (EFA_DEV_CAP(dev
, RDMA_WRITE
))
262 resp
.device_caps
|= EFA_QUERY_DEVICE_CAPS_RDMA_WRITE
;
264 if (EFA_DEV_CAP(dev
, UNSOLICITED_WRITE_RECV
))
265 resp
.device_caps
|= EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV
;
268 resp
.device_caps
|= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS
;
270 err
= ib_copy_to_udata(udata
, &resp
,
271 min(sizeof(resp
), udata
->outlen
));
274 "Failed to copy udata for query_device\n");
282 static void efa_link_gbps_to_speed_and_width(u16 gbps
,
283 enum ib_port_speed
*speed
,
284 enum ib_port_width
*width
)
287 *width
= IB_WIDTH_8X
;
288 *speed
= IB_SPEED_HDR
;
289 } else if (gbps
>= 200) {
290 *width
= IB_WIDTH_4X
;
291 *speed
= IB_SPEED_HDR
;
292 } else if (gbps
>= 120) {
293 *width
= IB_WIDTH_12X
;
294 *speed
= IB_SPEED_FDR10
;
295 } else if (gbps
>= 100) {
296 *width
= IB_WIDTH_4X
;
297 *speed
= IB_SPEED_EDR
;
298 } else if (gbps
>= 60) {
299 *width
= IB_WIDTH_12X
;
300 *speed
= IB_SPEED_DDR
;
301 } else if (gbps
>= 50) {
302 *width
= IB_WIDTH_1X
;
303 *speed
= IB_SPEED_HDR
;
304 } else if (gbps
>= 40) {
305 *width
= IB_WIDTH_4X
;
306 *speed
= IB_SPEED_FDR10
;
307 } else if (gbps
>= 30) {
308 *width
= IB_WIDTH_12X
;
309 *speed
= IB_SPEED_SDR
;
311 *width
= IB_WIDTH_1X
;
312 *speed
= IB_SPEED_EDR
;
316 int efa_query_port(struct ib_device
*ibdev
, u32 port
,
317 struct ib_port_attr
*props
)
319 struct efa_dev
*dev
= to_edev(ibdev
);
320 enum ib_port_speed link_speed
;
321 enum ib_port_width link_width
;
326 props
->state
= IB_PORT_ACTIVE
;
327 props
->phys_state
= IB_PORT_PHYS_STATE_LINK_UP
;
328 props
->gid_tbl_len
= 1;
329 props
->pkey_tbl_len
= 1;
330 link_gbps
= dev
->dev_attr
.max_link_speed_gbps
?: EFA_DEFAULT_LINK_SPEED_GBPS
;
331 efa_link_gbps_to_speed_and_width(link_gbps
, &link_speed
, &link_width
);
332 props
->active_speed
= link_speed
;
333 props
->active_width
= link_width
;
334 props
->max_mtu
= ib_mtu_int_to_enum(dev
->dev_attr
.mtu
);
335 props
->active_mtu
= ib_mtu_int_to_enum(dev
->dev_attr
.mtu
);
336 props
->max_msg_sz
= dev
->dev_attr
.mtu
;
337 props
->max_vl_num
= 1;
342 int efa_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*qp_attr
,
344 struct ib_qp_init_attr
*qp_init_attr
)
346 struct efa_dev
*dev
= to_edev(ibqp
->device
);
347 struct efa_com_query_qp_params params
= {};
348 struct efa_com_query_qp_result result
;
349 struct efa_qp
*qp
= to_eqp(ibqp
);
352 #define EFA_QUERY_QP_SUPP_MASK \
353 (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
354 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY)
356 if (qp_attr_mask
& ~EFA_QUERY_QP_SUPP_MASK
) {
357 ibdev_dbg(&dev
->ibdev
,
358 "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
359 qp_attr_mask
, EFA_QUERY_QP_SUPP_MASK
);
363 memset(qp_attr
, 0, sizeof(*qp_attr
));
364 memset(qp_init_attr
, 0, sizeof(*qp_init_attr
));
366 params
.qp_handle
= qp
->qp_handle
;
367 err
= efa_com_query_qp(&dev
->edev
, ¶ms
, &result
);
371 qp_attr
->qp_state
= result
.qp_state
;
372 qp_attr
->qkey
= result
.qkey
;
373 qp_attr
->sq_psn
= result
.sq_psn
;
374 qp_attr
->sq_draining
= result
.sq_draining
;
375 qp_attr
->port_num
= 1;
376 qp_attr
->rnr_retry
= result
.rnr_retry
;
378 qp_attr
->cap
.max_send_wr
= qp
->max_send_wr
;
379 qp_attr
->cap
.max_recv_wr
= qp
->max_recv_wr
;
380 qp_attr
->cap
.max_send_sge
= qp
->max_send_sge
;
381 qp_attr
->cap
.max_recv_sge
= qp
->max_recv_sge
;
382 qp_attr
->cap
.max_inline_data
= qp
->max_inline_data
;
384 qp_init_attr
->qp_type
= ibqp
->qp_type
;
385 qp_init_attr
->recv_cq
= ibqp
->recv_cq
;
386 qp_init_attr
->send_cq
= ibqp
->send_cq
;
387 qp_init_attr
->qp_context
= ibqp
->qp_context
;
388 qp_init_attr
->cap
= qp_attr
->cap
;
393 int efa_query_gid(struct ib_device
*ibdev
, u32 port
, int index
,
396 struct efa_dev
*dev
= to_edev(ibdev
);
398 memcpy(gid
->raw
, dev
->dev_attr
.addr
, sizeof(dev
->dev_attr
.addr
));
403 int efa_query_pkey(struct ib_device
*ibdev
, u32 port
, u16 index
,
413 static int efa_pd_dealloc(struct efa_dev
*dev
, u16 pdn
)
415 struct efa_com_dealloc_pd_params params
= {
419 return efa_com_dealloc_pd(&dev
->edev
, ¶ms
);
422 int efa_alloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
424 struct efa_dev
*dev
= to_edev(ibpd
->device
);
425 struct efa_ibv_alloc_pd_resp resp
= {};
426 struct efa_com_alloc_pd_result result
;
427 struct efa_pd
*pd
= to_epd(ibpd
);
431 !ib_is_udata_cleared(udata
, 0, udata
->inlen
)) {
432 ibdev_dbg(&dev
->ibdev
,
433 "Incompatible ABI params, udata not cleared\n");
438 err
= efa_com_alloc_pd(&dev
->edev
, &result
);
442 pd
->pdn
= result
.pdn
;
443 resp
.pdn
= result
.pdn
;
446 err
= ib_copy_to_udata(udata
, &resp
,
447 min(sizeof(resp
), udata
->outlen
));
449 ibdev_dbg(&dev
->ibdev
,
450 "Failed to copy udata for alloc_pd\n");
455 ibdev_dbg(&dev
->ibdev
, "Allocated pd[%d]\n", pd
->pdn
);
460 efa_pd_dealloc(dev
, result
.pdn
);
462 atomic64_inc(&dev
->stats
.alloc_pd_err
);
466 int efa_dealloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
468 struct efa_dev
*dev
= to_edev(ibpd
->device
);
469 struct efa_pd
*pd
= to_epd(ibpd
);
471 ibdev_dbg(&dev
->ibdev
, "Dealloc pd[%d]\n", pd
->pdn
);
472 efa_pd_dealloc(dev
, pd
->pdn
);
476 static int efa_destroy_qp_handle(struct efa_dev
*dev
, u32 qp_handle
)
478 struct efa_com_destroy_qp_params params
= { .qp_handle
= qp_handle
};
480 return efa_com_destroy_qp(&dev
->edev
, ¶ms
);
483 static void efa_qp_user_mmap_entries_remove(struct efa_qp
*qp
)
485 rdma_user_mmap_entry_remove(qp
->rq_mmap_entry
);
486 rdma_user_mmap_entry_remove(qp
->rq_db_mmap_entry
);
487 rdma_user_mmap_entry_remove(qp
->llq_desc_mmap_entry
);
488 rdma_user_mmap_entry_remove(qp
->sq_db_mmap_entry
);
491 int efa_destroy_qp(struct ib_qp
*ibqp
, struct ib_udata
*udata
)
493 struct efa_dev
*dev
= to_edev(ibqp
->pd
->device
);
494 struct efa_qp
*qp
= to_eqp(ibqp
);
497 ibdev_dbg(&dev
->ibdev
, "Destroy qp[%u]\n", ibqp
->qp_num
);
499 err
= efa_destroy_qp_handle(dev
, qp
->qp_handle
);
503 efa_qp_user_mmap_entries_remove(qp
);
505 if (qp
->rq_cpu_addr
) {
506 ibdev_dbg(&dev
->ibdev
,
507 "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
508 qp
->rq_cpu_addr
, qp
->rq_size
,
510 efa_free_mapped(dev
, qp
->rq_cpu_addr
, qp
->rq_dma_addr
,
511 qp
->rq_size
, DMA_TO_DEVICE
);
517 static struct rdma_user_mmap_entry
*
518 efa_user_mmap_entry_insert(struct ib_ucontext
*ucontext
,
519 u64 address
, size_t length
,
520 u8 mmap_flag
, u64
*offset
)
522 struct efa_user_mmap_entry
*entry
= kzalloc(sizeof(*entry
), GFP_KERNEL
);
528 entry
->address
= address
;
529 entry
->mmap_flag
= mmap_flag
;
531 err
= rdma_user_mmap_entry_insert(ucontext
, &entry
->rdma_entry
,
537 *offset
= rdma_user_mmap_get_offset(&entry
->rdma_entry
);
539 return &entry
->rdma_entry
;
542 static int qp_mmap_entries_setup(struct efa_qp
*qp
,
544 struct efa_ucontext
*ucontext
,
545 struct efa_com_create_qp_params
*params
,
546 struct efa_ibv_create_qp_resp
*resp
)
551 address
= dev
->db_bar_addr
+ resp
->sq_db_offset
;
552 qp
->sq_db_mmap_entry
=
553 efa_user_mmap_entry_insert(&ucontext
->ibucontext
,
555 PAGE_SIZE
, EFA_MMAP_IO_NC
,
556 &resp
->sq_db_mmap_key
);
557 if (!qp
->sq_db_mmap_entry
)
560 resp
->sq_db_offset
&= ~PAGE_MASK
;
562 address
= dev
->mem_bar_addr
+ resp
->llq_desc_offset
;
563 length
= PAGE_ALIGN(params
->sq_ring_size_in_bytes
+
564 offset_in_page(resp
->llq_desc_offset
));
566 qp
->llq_desc_mmap_entry
=
567 efa_user_mmap_entry_insert(&ucontext
->ibucontext
,
570 &resp
->llq_desc_mmap_key
);
571 if (!qp
->llq_desc_mmap_entry
)
572 goto err_remove_mmap
;
574 resp
->llq_desc_offset
&= ~PAGE_MASK
;
577 address
= dev
->db_bar_addr
+ resp
->rq_db_offset
;
579 qp
->rq_db_mmap_entry
=
580 efa_user_mmap_entry_insert(&ucontext
->ibucontext
,
583 &resp
->rq_db_mmap_key
);
584 if (!qp
->rq_db_mmap_entry
)
585 goto err_remove_mmap
;
587 resp
->rq_db_offset
&= ~PAGE_MASK
;
589 address
= virt_to_phys(qp
->rq_cpu_addr
);
591 efa_user_mmap_entry_insert(&ucontext
->ibucontext
,
592 address
, qp
->rq_size
,
595 if (!qp
->rq_mmap_entry
)
596 goto err_remove_mmap
;
598 resp
->rq_mmap_size
= qp
->rq_size
;
604 efa_qp_user_mmap_entries_remove(qp
);
609 static int efa_qp_validate_cap(struct efa_dev
*dev
,
610 struct ib_qp_init_attr
*init_attr
)
612 if (init_attr
->cap
.max_send_wr
> dev
->dev_attr
.max_sq_depth
) {
613 ibdev_dbg(&dev
->ibdev
,
614 "qp: requested send wr[%u] exceeds the max[%u]\n",
615 init_attr
->cap
.max_send_wr
,
616 dev
->dev_attr
.max_sq_depth
);
619 if (init_attr
->cap
.max_recv_wr
> dev
->dev_attr
.max_rq_depth
) {
620 ibdev_dbg(&dev
->ibdev
,
621 "qp: requested receive wr[%u] exceeds the max[%u]\n",
622 init_attr
->cap
.max_recv_wr
,
623 dev
->dev_attr
.max_rq_depth
);
626 if (init_attr
->cap
.max_send_sge
> dev
->dev_attr
.max_sq_sge
) {
627 ibdev_dbg(&dev
->ibdev
,
628 "qp: requested sge send[%u] exceeds the max[%u]\n",
629 init_attr
->cap
.max_send_sge
, dev
->dev_attr
.max_sq_sge
);
632 if (init_attr
->cap
.max_recv_sge
> dev
->dev_attr
.max_rq_sge
) {
633 ibdev_dbg(&dev
->ibdev
,
634 "qp: requested sge recv[%u] exceeds the max[%u]\n",
635 init_attr
->cap
.max_recv_sge
, dev
->dev_attr
.max_rq_sge
);
638 if (init_attr
->cap
.max_inline_data
> dev
->dev_attr
.inline_buf_size
) {
639 ibdev_dbg(&dev
->ibdev
,
640 "qp: requested inline data[%u] exceeds the max[%u]\n",
641 init_attr
->cap
.max_inline_data
,
642 dev
->dev_attr
.inline_buf_size
);
649 static int efa_qp_validate_attr(struct efa_dev
*dev
,
650 struct ib_qp_init_attr
*init_attr
)
652 if (init_attr
->qp_type
!= IB_QPT_DRIVER
&&
653 init_attr
->qp_type
!= IB_QPT_UD
) {
654 ibdev_dbg(&dev
->ibdev
,
655 "Unsupported qp type %d\n", init_attr
->qp_type
);
659 if (init_attr
->srq
) {
660 ibdev_dbg(&dev
->ibdev
, "SRQ is not supported\n");
664 if (init_attr
->create_flags
) {
665 ibdev_dbg(&dev
->ibdev
, "Unsupported create flags\n");
672 int efa_create_qp(struct ib_qp
*ibqp
, struct ib_qp_init_attr
*init_attr
,
673 struct ib_udata
*udata
)
675 struct efa_com_create_qp_params create_qp_params
= {};
676 struct efa_com_create_qp_result create_qp_resp
;
677 struct efa_dev
*dev
= to_edev(ibqp
->device
);
678 struct efa_ibv_create_qp_resp resp
= {};
679 struct efa_ibv_create_qp cmd
= {};
680 struct efa_qp
*qp
= to_eqp(ibqp
);
681 struct efa_ucontext
*ucontext
;
682 u16 supported_efa_flags
= 0;
685 ucontext
= rdma_udata_to_drv_context(udata
, struct efa_ucontext
,
688 err
= efa_qp_validate_cap(dev
, init_attr
);
692 err
= efa_qp_validate_attr(dev
, init_attr
);
696 if (offsetofend(typeof(cmd
), driver_qp_type
) > udata
->inlen
) {
697 ibdev_dbg(&dev
->ibdev
,
698 "Incompatible ABI params, no input udata\n");
703 if (udata
->inlen
> sizeof(cmd
) &&
704 !ib_is_udata_cleared(udata
, sizeof(cmd
),
705 udata
->inlen
- sizeof(cmd
))) {
706 ibdev_dbg(&dev
->ibdev
,
707 "Incompatible ABI params, unknown fields in udata\n");
712 err
= ib_copy_from_udata(&cmd
, udata
,
713 min(sizeof(cmd
), udata
->inlen
));
715 ibdev_dbg(&dev
->ibdev
,
716 "Cannot copy udata for create_qp\n");
720 if (cmd
.comp_mask
|| !is_reserved_cleared(cmd
.reserved_98
)) {
721 ibdev_dbg(&dev
->ibdev
,
722 "Incompatible ABI params, unknown fields in udata\n");
727 if (EFA_DEV_CAP(dev
, UNSOLICITED_WRITE_RECV
))
728 supported_efa_flags
|= EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV
;
730 if (cmd
.flags
& ~supported_efa_flags
) {
731 ibdev_dbg(&dev
->ibdev
, "Unsupported EFA QP create flags[%#x], supported[%#x]\n",
732 cmd
.flags
, supported_efa_flags
);
737 create_qp_params
.uarn
= ucontext
->uarn
;
738 create_qp_params
.pd
= to_epd(ibqp
->pd
)->pdn
;
740 if (init_attr
->qp_type
== IB_QPT_UD
) {
741 create_qp_params
.qp_type
= EFA_ADMIN_QP_TYPE_UD
;
742 } else if (cmd
.driver_qp_type
== EFA_QP_DRIVER_TYPE_SRD
) {
743 create_qp_params
.qp_type
= EFA_ADMIN_QP_TYPE_SRD
;
745 ibdev_dbg(&dev
->ibdev
,
746 "Unsupported qp type %d driver qp type %d\n",
747 init_attr
->qp_type
, cmd
.driver_qp_type
);
752 ibdev_dbg(&dev
->ibdev
, "Create QP: qp type %d driver qp type %#x\n",
753 init_attr
->qp_type
, cmd
.driver_qp_type
);
754 create_qp_params
.send_cq_idx
= to_ecq(init_attr
->send_cq
)->cq_idx
;
755 create_qp_params
.recv_cq_idx
= to_ecq(init_attr
->recv_cq
)->cq_idx
;
756 create_qp_params
.sq_depth
= init_attr
->cap
.max_send_wr
;
757 create_qp_params
.sq_ring_size_in_bytes
= cmd
.sq_ring_size
;
759 create_qp_params
.rq_depth
= init_attr
->cap
.max_recv_wr
;
760 create_qp_params
.rq_ring_size_in_bytes
= cmd
.rq_ring_size
;
761 qp
->rq_size
= PAGE_ALIGN(create_qp_params
.rq_ring_size_in_bytes
);
763 qp
->rq_cpu_addr
= efa_zalloc_mapped(dev
, &qp
->rq_dma_addr
,
764 qp
->rq_size
, DMA_TO_DEVICE
);
765 if (!qp
->rq_cpu_addr
) {
770 ibdev_dbg(&dev
->ibdev
,
771 "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
772 qp
->rq_cpu_addr
, qp
->rq_size
, &qp
->rq_dma_addr
);
773 create_qp_params
.rq_base_addr
= qp
->rq_dma_addr
;
776 create_qp_params
.sl
= cmd
.sl
;
778 if (cmd
.flags
& EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV
)
779 create_qp_params
.unsolicited_write_recv
= true;
781 err
= efa_com_create_qp(&dev
->edev
, &create_qp_params
,
784 goto err_free_mapped
;
786 resp
.sq_db_offset
= create_qp_resp
.sq_db_offset
;
787 resp
.rq_db_offset
= create_qp_resp
.rq_db_offset
;
788 resp
.llq_desc_offset
= create_qp_resp
.llq_descriptors_offset
;
789 resp
.send_sub_cq_idx
= create_qp_resp
.send_sub_cq_idx
;
790 resp
.recv_sub_cq_idx
= create_qp_resp
.recv_sub_cq_idx
;
792 err
= qp_mmap_entries_setup(qp
, dev
, ucontext
, &create_qp_params
,
797 qp
->qp_handle
= create_qp_resp
.qp_handle
;
798 qp
->ibqp
.qp_num
= create_qp_resp
.qp_num
;
799 qp
->max_send_wr
= init_attr
->cap
.max_send_wr
;
800 qp
->max_recv_wr
= init_attr
->cap
.max_recv_wr
;
801 qp
->max_send_sge
= init_attr
->cap
.max_send_sge
;
802 qp
->max_recv_sge
= init_attr
->cap
.max_recv_sge
;
803 qp
->max_inline_data
= init_attr
->cap
.max_inline_data
;
806 err
= ib_copy_to_udata(udata
, &resp
,
807 min(sizeof(resp
), udata
->outlen
));
809 ibdev_dbg(&dev
->ibdev
,
810 "Failed to copy udata for qp[%u]\n",
811 create_qp_resp
.qp_num
);
812 goto err_remove_mmap_entries
;
816 ibdev_dbg(&dev
->ibdev
, "Created qp[%d]\n", qp
->ibqp
.qp_num
);
820 err_remove_mmap_entries
:
821 efa_qp_user_mmap_entries_remove(qp
);
823 efa_destroy_qp_handle(dev
, create_qp_resp
.qp_handle
);
826 efa_free_mapped(dev
, qp
->rq_cpu_addr
, qp
->rq_dma_addr
,
827 qp
->rq_size
, DMA_TO_DEVICE
);
829 atomic64_inc(&dev
->stats
.create_qp_err
);
833 static const struct {
835 enum ib_qp_attr_mask req_param
;
836 enum ib_qp_attr_mask opt_param
;
837 } srd_qp_state_table
[IB_QPS_ERR
+ 1][IB_QPS_ERR
+ 1] = {
839 [IB_QPS_RESET
] = { .valid
= 1 },
842 .req_param
= IB_QP_PKEY_INDEX
|
848 [IB_QPS_RESET
] = { .valid
= 1 },
849 [IB_QPS_ERR
] = { .valid
= 1 },
852 .opt_param
= IB_QP_PKEY_INDEX
|
858 .opt_param
= IB_QP_PKEY_INDEX
|
863 [IB_QPS_RESET
] = { .valid
= 1 },
864 [IB_QPS_ERR
] = { .valid
= 1 },
867 .req_param
= IB_QP_SQ_PSN
,
868 .opt_param
= IB_QP_CUR_STATE
|
875 [IB_QPS_RESET
] = { .valid
= 1 },
876 [IB_QPS_ERR
] = { .valid
= 1 },
879 .opt_param
= IB_QP_CUR_STATE
|
884 .opt_param
= IB_QP_EN_SQD_ASYNC_NOTIFY
,
888 [IB_QPS_RESET
] = { .valid
= 1 },
889 [IB_QPS_ERR
] = { .valid
= 1 },
892 .opt_param
= IB_QP_CUR_STATE
|
897 .opt_param
= IB_QP_PKEY_INDEX
|
902 [IB_QPS_RESET
] = { .valid
= 1 },
903 [IB_QPS_ERR
] = { .valid
= 1 },
906 .opt_param
= IB_QP_CUR_STATE
|
911 [IB_QPS_RESET
] = { .valid
= 1 },
912 [IB_QPS_ERR
] = { .valid
= 1 },
916 static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state
,
917 enum ib_qp_state next_state
,
918 enum ib_qp_attr_mask mask
)
920 enum ib_qp_attr_mask req_param
, opt_param
;
922 if (mask
& IB_QP_CUR_STATE
&&
923 cur_state
!= IB_QPS_RTR
&& cur_state
!= IB_QPS_RTS
&&
924 cur_state
!= IB_QPS_SQD
&& cur_state
!= IB_QPS_SQE
)
927 if (!srd_qp_state_table
[cur_state
][next_state
].valid
)
930 req_param
= srd_qp_state_table
[cur_state
][next_state
].req_param
;
931 opt_param
= srd_qp_state_table
[cur_state
][next_state
].opt_param
;
933 if ((mask
& req_param
) != req_param
)
936 if (mask
& ~(req_param
| opt_param
| IB_QP_STATE
))
942 static int efa_modify_qp_validate(struct efa_dev
*dev
, struct efa_qp
*qp
,
943 struct ib_qp_attr
*qp_attr
, int qp_attr_mask
,
944 enum ib_qp_state cur_state
,
945 enum ib_qp_state new_state
)
949 #define EFA_MODIFY_QP_SUPP_MASK \
950 (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
951 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \
954 if (qp_attr_mask
& ~EFA_MODIFY_QP_SUPP_MASK
) {
955 ibdev_dbg(&dev
->ibdev
,
956 "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
957 qp_attr_mask
, EFA_MODIFY_QP_SUPP_MASK
);
961 if (qp
->ibqp
.qp_type
== IB_QPT_DRIVER
)
962 err
= !efa_modify_srd_qp_is_ok(cur_state
, new_state
,
965 err
= !ib_modify_qp_is_ok(cur_state
, new_state
, IB_QPT_UD
,
969 ibdev_dbg(&dev
->ibdev
, "Invalid modify QP parameters\n");
973 if ((qp_attr_mask
& IB_QP_PORT
) && qp_attr
->port_num
!= 1) {
974 ibdev_dbg(&dev
->ibdev
, "Can't change port num\n");
978 if ((qp_attr_mask
& IB_QP_PKEY_INDEX
) && qp_attr
->pkey_index
) {
979 ibdev_dbg(&dev
->ibdev
, "Can't change pkey index\n");
986 int efa_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*qp_attr
,
987 int qp_attr_mask
, struct ib_udata
*udata
)
989 struct efa_dev
*dev
= to_edev(ibqp
->device
);
990 struct efa_com_modify_qp_params params
= {};
991 struct efa_qp
*qp
= to_eqp(ibqp
);
992 enum ib_qp_state cur_state
;
993 enum ib_qp_state new_state
;
996 if (qp_attr_mask
& ~IB_QP_ATTR_STANDARD_BITS
)
1000 !ib_is_udata_cleared(udata
, 0, udata
->inlen
)) {
1001 ibdev_dbg(&dev
->ibdev
,
1002 "Incompatible ABI params, udata not cleared\n");
1006 cur_state
= qp_attr_mask
& IB_QP_CUR_STATE
? qp_attr
->cur_qp_state
:
1008 new_state
= qp_attr_mask
& IB_QP_STATE
? qp_attr
->qp_state
: cur_state
;
1010 err
= efa_modify_qp_validate(dev
, qp
, qp_attr
, qp_attr_mask
, cur_state
,
1015 params
.qp_handle
= qp
->qp_handle
;
1017 if (qp_attr_mask
& IB_QP_STATE
) {
1018 EFA_SET(¶ms
.modify_mask
, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE
,
1020 EFA_SET(¶ms
.modify_mask
,
1021 EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE
, 1);
1022 params
.cur_qp_state
= cur_state
;
1023 params
.qp_state
= new_state
;
1026 if (qp_attr_mask
& IB_QP_EN_SQD_ASYNC_NOTIFY
) {
1027 EFA_SET(¶ms
.modify_mask
,
1028 EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY
, 1);
1029 params
.sq_drained_async_notify
= qp_attr
->en_sqd_async_notify
;
1032 if (qp_attr_mask
& IB_QP_QKEY
) {
1033 EFA_SET(¶ms
.modify_mask
, EFA_ADMIN_MODIFY_QP_CMD_QKEY
, 1);
1034 params
.qkey
= qp_attr
->qkey
;
1037 if (qp_attr_mask
& IB_QP_SQ_PSN
) {
1038 EFA_SET(¶ms
.modify_mask
, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN
, 1);
1039 params
.sq_psn
= qp_attr
->sq_psn
;
1042 if (qp_attr_mask
& IB_QP_RNR_RETRY
) {
1043 EFA_SET(¶ms
.modify_mask
, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY
,
1045 params
.rnr_retry
= qp_attr
->rnr_retry
;
1048 err
= efa_com_modify_qp(&dev
->edev
, ¶ms
);
1052 qp
->state
= new_state
;
1057 static int efa_destroy_cq_idx(struct efa_dev
*dev
, int cq_idx
)
1059 struct efa_com_destroy_cq_params params
= { .cq_idx
= cq_idx
};
1061 return efa_com_destroy_cq(&dev
->edev
, ¶ms
);
1064 static void efa_cq_user_mmap_entries_remove(struct efa_cq
*cq
)
1066 rdma_user_mmap_entry_remove(cq
->db_mmap_entry
);
1067 rdma_user_mmap_entry_remove(cq
->mmap_entry
);
1070 int efa_destroy_cq(struct ib_cq
*ibcq
, struct ib_udata
*udata
)
1072 struct efa_dev
*dev
= to_edev(ibcq
->device
);
1073 struct efa_cq
*cq
= to_ecq(ibcq
);
1075 ibdev_dbg(&dev
->ibdev
,
1076 "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
1077 cq
->cq_idx
, cq
->cpu_addr
, cq
->size
, &cq
->dma_addr
);
1079 efa_destroy_cq_idx(dev
, cq
->cq_idx
);
1080 efa_cq_user_mmap_entries_remove(cq
);
1082 xa_erase(&dev
->cqs_xa
, cq
->cq_idx
);
1083 synchronize_irq(cq
->eq
->irq
.irqn
);
1085 efa_free_mapped(dev
, cq
->cpu_addr
, cq
->dma_addr
, cq
->size
,
1090 static struct efa_eq
*efa_vec2eq(struct efa_dev
*dev
, int vec
)
1092 return &dev
->eqs
[vec
];
1095 static int cq_mmap_entries_setup(struct efa_dev
*dev
, struct efa_cq
*cq
,
1096 struct efa_ibv_create_cq_resp
*resp
,
1099 resp
->q_mmap_size
= cq
->size
;
1100 cq
->mmap_entry
= efa_user_mmap_entry_insert(&cq
->ucontext
->ibucontext
,
1101 virt_to_phys(cq
->cpu_addr
),
1102 cq
->size
, EFA_MMAP_DMA_PAGE
,
1104 if (!cq
->mmap_entry
)
1109 efa_user_mmap_entry_insert(&cq
->ucontext
->ibucontext
,
1110 dev
->db_bar_addr
+ resp
->db_off
,
1111 PAGE_SIZE
, EFA_MMAP_IO_NC
,
1112 &resp
->db_mmap_key
);
1113 if (!cq
->db_mmap_entry
) {
1114 rdma_user_mmap_entry_remove(cq
->mmap_entry
);
1118 resp
->db_off
&= ~PAGE_MASK
;
1119 resp
->comp_mask
|= EFA_CREATE_CQ_RESP_DB_OFF
;
1125 int efa_create_cq(struct ib_cq
*ibcq
, const struct ib_cq_init_attr
*attr
,
1126 struct uverbs_attr_bundle
*attrs
)
1128 struct ib_udata
*udata
= &attrs
->driver_udata
;
1129 struct efa_ucontext
*ucontext
= rdma_udata_to_drv_context(
1130 udata
, struct efa_ucontext
, ibucontext
);
1131 struct efa_com_create_cq_params params
= {};
1132 struct efa_ibv_create_cq_resp resp
= {};
1133 struct efa_com_create_cq_result result
;
1134 struct ib_device
*ibdev
= ibcq
->device
;
1135 struct efa_dev
*dev
= to_edev(ibdev
);
1136 struct efa_ibv_create_cq cmd
= {};
1137 struct efa_cq
*cq
= to_ecq(ibcq
);
1138 int entries
= attr
->cqe
;
1142 ibdev_dbg(ibdev
, "create_cq entries %d\n", entries
);
1147 if (entries
< 1 || entries
> dev
->dev_attr
.max_cq_depth
) {
1149 "cq: requested entries[%u] non-positive or greater than max[%u]\n",
1150 entries
, dev
->dev_attr
.max_cq_depth
);
1155 if (offsetofend(typeof(cmd
), num_sub_cqs
) > udata
->inlen
) {
1157 "Incompatible ABI params, no input udata\n");
1162 if (udata
->inlen
> sizeof(cmd
) &&
1163 !ib_is_udata_cleared(udata
, sizeof(cmd
),
1164 udata
->inlen
- sizeof(cmd
))) {
1166 "Incompatible ABI params, unknown fields in udata\n");
1171 err
= ib_copy_from_udata(&cmd
, udata
,
1172 min(sizeof(cmd
), udata
->inlen
));
1174 ibdev_dbg(ibdev
, "Cannot copy udata for create_cq\n");
1178 if (cmd
.comp_mask
|| !is_reserved_cleared(cmd
.reserved_58
)) {
1180 "Incompatible ABI params, unknown fields in udata\n");
1185 set_src_addr
= !!(cmd
.flags
& EFA_CREATE_CQ_WITH_SGID
);
1186 if ((cmd
.cq_entry_size
!= sizeof(struct efa_io_rx_cdesc_ex
)) &&
1188 cmd
.cq_entry_size
!= sizeof(struct efa_io_rx_cdesc
))) {
1190 "Invalid entry size [%u]\n", cmd
.cq_entry_size
);
1195 if (cmd
.num_sub_cqs
!= dev
->dev_attr
.sub_cqs_per_cq
) {
1197 "Invalid number of sub cqs[%u] expected[%u]\n",
1198 cmd
.num_sub_cqs
, dev
->dev_attr
.sub_cqs_per_cq
);
1203 cq
->ucontext
= ucontext
;
1204 cq
->size
= PAGE_ALIGN(cmd
.cq_entry_size
* entries
* cmd
.num_sub_cqs
);
1205 cq
->cpu_addr
= efa_zalloc_mapped(dev
, &cq
->dma_addr
, cq
->size
,
1207 if (!cq
->cpu_addr
) {
1212 params
.uarn
= cq
->ucontext
->uarn
;
1213 params
.sub_cq_depth
= entries
;
1214 params
.dma_addr
= cq
->dma_addr
;
1215 params
.entry_size_in_bytes
= cmd
.cq_entry_size
;
1216 params
.num_sub_cqs
= cmd
.num_sub_cqs
;
1217 params
.set_src_addr
= set_src_addr
;
1218 if (cmd
.flags
& EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL
) {
1219 cq
->eq
= efa_vec2eq(dev
, attr
->comp_vector
);
1220 params
.eqn
= cq
->eq
->eeq
.eqn
;
1221 params
.interrupt_mode_enabled
= true;
1224 err
= efa_com_create_cq(&dev
->edev
, ¶ms
, &result
);
1226 goto err_free_mapped
;
1228 resp
.db_off
= result
.db_off
;
1229 resp
.cq_idx
= result
.cq_idx
;
1230 cq
->cq_idx
= result
.cq_idx
;
1231 cq
->ibcq
.cqe
= result
.actual_depth
;
1232 WARN_ON_ONCE(entries
!= result
.actual_depth
);
1234 err
= cq_mmap_entries_setup(dev
, cq
, &resp
, result
.db_valid
);
1236 ibdev_dbg(ibdev
, "Could not setup cq[%u] mmap entries\n",
1238 goto err_destroy_cq
;
1242 err
= xa_err(xa_store(&dev
->cqs_xa
, cq
->cq_idx
, cq
, GFP_KERNEL
));
1244 ibdev_dbg(ibdev
, "Failed to store cq[%u] in xarray\n",
1246 goto err_remove_mmap
;
1250 if (udata
->outlen
) {
1251 err
= ib_copy_to_udata(udata
, &resp
,
1252 min(sizeof(resp
), udata
->outlen
));
1255 "Failed to copy udata for create_cq\n");
1260 ibdev_dbg(ibdev
, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
1261 cq
->cq_idx
, result
.actual_depth
, &cq
->dma_addr
, cq
->cpu_addr
);
1267 xa_erase(&dev
->cqs_xa
, cq
->cq_idx
);
1269 efa_cq_user_mmap_entries_remove(cq
);
1271 efa_destroy_cq_idx(dev
, cq
->cq_idx
);
1273 efa_free_mapped(dev
, cq
->cpu_addr
, cq
->dma_addr
, cq
->size
,
1277 atomic64_inc(&dev
->stats
.create_cq_err
);
1281 static int umem_to_page_list(struct efa_dev
*dev
,
1282 struct ib_umem
*umem
,
1287 u32 pages_in_hp
= BIT(hp_shift
- PAGE_SHIFT
);
1288 struct ib_block_iter biter
;
1289 unsigned int hp_idx
= 0;
1291 ibdev_dbg(&dev
->ibdev
, "hp_cnt[%u], pages_in_hp[%u]\n",
1292 hp_cnt
, pages_in_hp
);
1294 rdma_umem_for_each_dma_block(umem
, &biter
, BIT(hp_shift
))
1295 page_list
[hp_idx
++] = rdma_block_iter_dma_address(&biter
);
1300 static struct scatterlist
*efa_vmalloc_buf_to_sg(u64
*buf
, int page_cnt
)
1302 struct scatterlist
*sglist
;
1306 sglist
= kmalloc_array(page_cnt
, sizeof(*sglist
), GFP_KERNEL
);
1309 sg_init_table(sglist
, page_cnt
);
1310 for (i
= 0; i
< page_cnt
; i
++) {
1311 pg
= vmalloc_to_page(buf
);
1314 sg_set_page(&sglist
[i
], pg
, PAGE_SIZE
, 0);
1315 buf
+= PAGE_SIZE
/ sizeof(*buf
);
1325 * create a chunk list of physical pages dma addresses from the supplied
1326 * scatter gather list
1328 static int pbl_chunk_list_create(struct efa_dev
*dev
, struct pbl_context
*pbl
)
1330 struct pbl_chunk_list
*chunk_list
= &pbl
->phys
.indirect
.chunk_list
;
1331 int page_cnt
= pbl
->phys
.indirect
.pbl_buf_size_in_pages
;
1332 struct scatterlist
*pages_sgl
= pbl
->phys
.indirect
.sgl
;
1333 unsigned int chunk_list_size
, chunk_idx
, payload_idx
;
1334 int sg_dma_cnt
= pbl
->phys
.indirect
.sg_dma_cnt
;
1335 struct efa_com_ctrl_buff_info
*ctrl_buf
;
1336 u64
*cur_chunk_buf
, *prev_chunk_buf
;
1337 struct ib_block_iter biter
;
1338 dma_addr_t dma_addr
;
1341 /* allocate a chunk list that consists of 4KB chunks */
1342 chunk_list_size
= DIV_ROUND_UP(page_cnt
, EFA_PTRS_PER_CHUNK
);
1344 chunk_list
->size
= chunk_list_size
;
1345 chunk_list
->chunks
= kcalloc(chunk_list_size
,
1346 sizeof(*chunk_list
->chunks
),
1348 if (!chunk_list
->chunks
)
1351 ibdev_dbg(&dev
->ibdev
,
1352 "chunk_list_size[%u] - pages[%u]\n", chunk_list_size
,
1355 /* allocate chunk buffers: */
1356 for (i
= 0; i
< chunk_list_size
; i
++) {
1357 chunk_list
->chunks
[i
].buf
= kzalloc(EFA_CHUNK_SIZE
, GFP_KERNEL
);
1358 if (!chunk_list
->chunks
[i
].buf
)
1359 goto chunk_list_dealloc
;
1361 chunk_list
->chunks
[i
].length
= EFA_CHUNK_USED_SIZE
;
1363 chunk_list
->chunks
[chunk_list_size
- 1].length
=
1364 ((page_cnt
% EFA_PTRS_PER_CHUNK
) * EFA_CHUNK_PAYLOAD_PTR_SIZE
) +
1367 /* fill the dma addresses of sg list pages to chunks: */
1370 cur_chunk_buf
= chunk_list
->chunks
[0].buf
;
1371 rdma_for_each_block(pages_sgl
, &biter
, sg_dma_cnt
,
1372 EFA_CHUNK_PAYLOAD_SIZE
) {
1373 cur_chunk_buf
[payload_idx
++] =
1374 rdma_block_iter_dma_address(&biter
);
1376 if (payload_idx
== EFA_PTRS_PER_CHUNK
) {
1378 cur_chunk_buf
= chunk_list
->chunks
[chunk_idx
].buf
;
1383 /* map chunks to dma and fill chunks next ptrs */
1384 for (i
= chunk_list_size
- 1; i
>= 0; i
--) {
1385 dma_addr
= dma_map_single(&dev
->pdev
->dev
,
1386 chunk_list
->chunks
[i
].buf
,
1387 chunk_list
->chunks
[i
].length
,
1389 if (dma_mapping_error(&dev
->pdev
->dev
, dma_addr
)) {
1390 ibdev_err(&dev
->ibdev
,
1391 "chunk[%u] dma_map_failed\n", i
);
1392 goto chunk_list_unmap
;
1395 chunk_list
->chunks
[i
].dma_addr
= dma_addr
;
1396 ibdev_dbg(&dev
->ibdev
,
1397 "chunk[%u] mapped at [%pad]\n", i
, &dma_addr
);
1402 prev_chunk_buf
= chunk_list
->chunks
[i
- 1].buf
;
1404 ctrl_buf
= (struct efa_com_ctrl_buff_info
*)
1405 &prev_chunk_buf
[EFA_PTRS_PER_CHUNK
];
1406 ctrl_buf
->length
= chunk_list
->chunks
[i
].length
;
1408 efa_com_set_dma_addr(dma_addr
,
1409 &ctrl_buf
->address
.mem_addr_high
,
1410 &ctrl_buf
->address
.mem_addr_low
);
1416 for (; i
< chunk_list_size
; i
++) {
1417 dma_unmap_single(&dev
->pdev
->dev
, chunk_list
->chunks
[i
].dma_addr
,
1418 chunk_list
->chunks
[i
].length
, DMA_TO_DEVICE
);
1421 for (i
= 0; i
< chunk_list_size
; i
++)
1422 kfree(chunk_list
->chunks
[i
].buf
);
1424 kfree(chunk_list
->chunks
);
1428 static void pbl_chunk_list_destroy(struct efa_dev
*dev
, struct pbl_context
*pbl
)
1430 struct pbl_chunk_list
*chunk_list
= &pbl
->phys
.indirect
.chunk_list
;
1433 for (i
= 0; i
< chunk_list
->size
; i
++) {
1434 dma_unmap_single(&dev
->pdev
->dev
, chunk_list
->chunks
[i
].dma_addr
,
1435 chunk_list
->chunks
[i
].length
, DMA_TO_DEVICE
);
1436 kfree(chunk_list
->chunks
[i
].buf
);
1439 kfree(chunk_list
->chunks
);
1442 /* initialize pbl continuous mode: map pbl buffer to a dma address. */
1443 static int pbl_continuous_initialize(struct efa_dev
*dev
,
1444 struct pbl_context
*pbl
)
1446 dma_addr_t dma_addr
;
1448 dma_addr
= dma_map_single(&dev
->pdev
->dev
, pbl
->pbl_buf
,
1449 pbl
->pbl_buf_size_in_bytes
, DMA_TO_DEVICE
);
1450 if (dma_mapping_error(&dev
->pdev
->dev
, dma_addr
)) {
1451 ibdev_err(&dev
->ibdev
, "Unable to map pbl to DMA address\n");
1455 pbl
->phys
.continuous
.dma_addr
= dma_addr
;
1456 ibdev_dbg(&dev
->ibdev
,
1457 "pbl continuous - dma_addr = %pad, size[%u]\n",
1458 &dma_addr
, pbl
->pbl_buf_size_in_bytes
);
1464 * initialize pbl indirect mode:
1465 * create a chunk list out of the dma addresses of the physical pages of
1468 static int pbl_indirect_initialize(struct efa_dev
*dev
, struct pbl_context
*pbl
)
1470 u32 size_in_pages
= DIV_ROUND_UP(pbl
->pbl_buf_size_in_bytes
, EFA_CHUNK_PAYLOAD_SIZE
);
1471 struct scatterlist
*sgl
;
1472 int sg_dma_cnt
, err
;
1474 BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE
> PAGE_SIZE
);
1475 sgl
= efa_vmalloc_buf_to_sg(pbl
->pbl_buf
, size_in_pages
);
1479 sg_dma_cnt
= dma_map_sg(&dev
->pdev
->dev
, sgl
, size_in_pages
, DMA_TO_DEVICE
);
1485 pbl
->phys
.indirect
.pbl_buf_size_in_pages
= size_in_pages
;
1486 pbl
->phys
.indirect
.sgl
= sgl
;
1487 pbl
->phys
.indirect
.sg_dma_cnt
= sg_dma_cnt
;
1488 err
= pbl_chunk_list_create(dev
, pbl
);
1490 ibdev_dbg(&dev
->ibdev
,
1491 "chunk_list creation failed[%d]\n", err
);
1495 ibdev_dbg(&dev
->ibdev
,
1496 "pbl indirect - size[%u], chunks[%u]\n",
1497 pbl
->pbl_buf_size_in_bytes
,
1498 pbl
->phys
.indirect
.chunk_list
.size
);
1503 dma_unmap_sg(&dev
->pdev
->dev
, sgl
, size_in_pages
, DMA_TO_DEVICE
);
1509 static void pbl_indirect_terminate(struct efa_dev
*dev
, struct pbl_context
*pbl
)
1511 pbl_chunk_list_destroy(dev
, pbl
);
1512 dma_unmap_sg(&dev
->pdev
->dev
, pbl
->phys
.indirect
.sgl
,
1513 pbl
->phys
.indirect
.pbl_buf_size_in_pages
, DMA_TO_DEVICE
);
1514 kfree(pbl
->phys
.indirect
.sgl
);
1517 /* create a page buffer list from a mapped user memory region */
1518 static int pbl_create(struct efa_dev
*dev
,
1519 struct pbl_context
*pbl
,
1520 struct ib_umem
*umem
,
1526 pbl
->pbl_buf_size_in_bytes
= hp_cnt
* EFA_CHUNK_PAYLOAD_PTR_SIZE
;
1527 pbl
->pbl_buf
= kvzalloc(pbl
->pbl_buf_size_in_bytes
, GFP_KERNEL
);
1531 if (is_vmalloc_addr(pbl
->pbl_buf
)) {
1532 pbl
->physically_continuous
= 0;
1533 err
= umem_to_page_list(dev
, umem
, pbl
->pbl_buf
, hp_cnt
,
1538 err
= pbl_indirect_initialize(dev
, pbl
);
1542 pbl
->physically_continuous
= 1;
1543 err
= umem_to_page_list(dev
, umem
, pbl
->pbl_buf
, hp_cnt
,
1548 err
= pbl_continuous_initialize(dev
, pbl
);
1553 ibdev_dbg(&dev
->ibdev
,
1554 "user_pbl_created: user_pages[%u], continuous[%u]\n",
1555 hp_cnt
, pbl
->physically_continuous
);
1560 kvfree(pbl
->pbl_buf
);
1564 static void pbl_destroy(struct efa_dev
*dev
, struct pbl_context
*pbl
)
1566 if (pbl
->physically_continuous
)
1567 dma_unmap_single(&dev
->pdev
->dev
, pbl
->phys
.continuous
.dma_addr
,
1568 pbl
->pbl_buf_size_in_bytes
, DMA_TO_DEVICE
);
1570 pbl_indirect_terminate(dev
, pbl
);
1572 kvfree(pbl
->pbl_buf
);
1575 static int efa_create_inline_pbl(struct efa_dev
*dev
, struct efa_mr
*mr
,
1576 struct efa_com_reg_mr_params
*params
)
1580 params
->inline_pbl
= 1;
1581 err
= umem_to_page_list(dev
, mr
->umem
, params
->pbl
.inline_pbl_array
,
1582 params
->page_num
, params
->page_shift
);
1586 ibdev_dbg(&dev
->ibdev
,
1587 "inline_pbl_array - pages[%u]\n", params
->page_num
);
1592 static int efa_create_pbl(struct efa_dev
*dev
,
1593 struct pbl_context
*pbl
,
1595 struct efa_com_reg_mr_params
*params
)
1599 err
= pbl_create(dev
, pbl
, mr
->umem
, params
->page_num
,
1600 params
->page_shift
);
1602 ibdev_dbg(&dev
->ibdev
, "Failed to create pbl[%d]\n", err
);
1606 params
->inline_pbl
= 0;
1607 params
->indirect
= !pbl
->physically_continuous
;
1608 if (pbl
->physically_continuous
) {
1609 params
->pbl
.pbl
.length
= pbl
->pbl_buf_size_in_bytes
;
1611 efa_com_set_dma_addr(pbl
->phys
.continuous
.dma_addr
,
1612 ¶ms
->pbl
.pbl
.address
.mem_addr_high
,
1613 ¶ms
->pbl
.pbl
.address
.mem_addr_low
);
1615 params
->pbl
.pbl
.length
=
1616 pbl
->phys
.indirect
.chunk_list
.chunks
[0].length
;
1618 efa_com_set_dma_addr(pbl
->phys
.indirect
.chunk_list
.chunks
[0].dma_addr
,
1619 ¶ms
->pbl
.pbl
.address
.mem_addr_high
,
1620 ¶ms
->pbl
.pbl
.address
.mem_addr_low
);
1626 static struct efa_mr
*efa_alloc_mr(struct ib_pd
*ibpd
, int access_flags
,
1627 struct ib_udata
*udata
)
1629 struct efa_dev
*dev
= to_edev(ibpd
->device
);
1630 int supp_access_flags
;
1633 if (udata
&& udata
->inlen
&&
1634 !ib_is_udata_cleared(udata
, 0, sizeof(udata
->inlen
))) {
1635 ibdev_dbg(&dev
->ibdev
,
1636 "Incompatible ABI params, udata not cleared\n");
1637 return ERR_PTR(-EINVAL
);
1641 IB_ACCESS_LOCAL_WRITE
|
1642 (EFA_DEV_CAP(dev
, RDMA_READ
) ? IB_ACCESS_REMOTE_READ
: 0) |
1643 (EFA_DEV_CAP(dev
, RDMA_WRITE
) ? IB_ACCESS_REMOTE_WRITE
: 0);
1645 access_flags
&= ~IB_ACCESS_OPTIONAL
;
1646 if (access_flags
& ~supp_access_flags
) {
1647 ibdev_dbg(&dev
->ibdev
,
1648 "Unsupported access flags[%#x], supported[%#x]\n",
1649 access_flags
, supp_access_flags
);
1650 return ERR_PTR(-EOPNOTSUPP
);
1653 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1655 return ERR_PTR(-ENOMEM
);
1660 static int efa_register_mr(struct ib_pd
*ibpd
, struct efa_mr
*mr
, u64 start
,
1661 u64 length
, u64 virt_addr
, int access_flags
)
1663 struct efa_dev
*dev
= to_edev(ibpd
->device
);
1664 struct efa_com_reg_mr_params params
= {};
1665 struct efa_com_reg_mr_result result
= {};
1666 struct pbl_context pbl
;
1671 params
.pd
= to_epd(ibpd
)->pdn
;
1672 params
.iova
= virt_addr
;
1673 params
.mr_length_in_bytes
= length
;
1674 params
.permissions
= access_flags
;
1676 pg_sz
= ib_umem_find_best_pgsz(mr
->umem
,
1677 dev
->dev_attr
.page_size_cap
,
1680 ibdev_dbg(&dev
->ibdev
, "Failed to find a suitable page size in page_size_cap %#llx\n",
1681 dev
->dev_attr
.page_size_cap
);
1685 params
.page_shift
= order_base_2(pg_sz
);
1686 params
.page_num
= ib_umem_num_dma_blocks(mr
->umem
, pg_sz
);
1688 ibdev_dbg(&dev
->ibdev
,
1689 "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
1690 start
, length
, params
.page_shift
, params
.page_num
);
1692 inline_size
= ARRAY_SIZE(params
.pbl
.inline_pbl_array
);
1693 if (params
.page_num
<= inline_size
) {
1694 err
= efa_create_inline_pbl(dev
, mr
, ¶ms
);
1698 err
= efa_com_register_mr(&dev
->edev
, ¶ms
, &result
);
1702 err
= efa_create_pbl(dev
, &pbl
, mr
, ¶ms
);
1706 err
= efa_com_register_mr(&dev
->edev
, ¶ms
, &result
);
1707 pbl_destroy(dev
, &pbl
);
1713 mr
->ibmr
.lkey
= result
.l_key
;
1714 mr
->ibmr
.rkey
= result
.r_key
;
1715 mr
->ibmr
.length
= length
;
1716 mr
->ic_info
.recv_ic_id
= result
.ic_info
.recv_ic_id
;
1717 mr
->ic_info
.rdma_read_ic_id
= result
.ic_info
.rdma_read_ic_id
;
1718 mr
->ic_info
.rdma_recv_ic_id
= result
.ic_info
.rdma_recv_ic_id
;
1719 mr
->ic_info
.recv_ic_id_valid
= result
.ic_info
.recv_ic_id_valid
;
1720 mr
->ic_info
.rdma_read_ic_id_valid
= result
.ic_info
.rdma_read_ic_id_valid
;
1721 mr
->ic_info
.rdma_recv_ic_id_valid
= result
.ic_info
.rdma_recv_ic_id_valid
;
1722 ibdev_dbg(&dev
->ibdev
, "Registered mr[%d]\n", mr
->ibmr
.lkey
);
1727 struct ib_mr
*efa_reg_user_mr_dmabuf(struct ib_pd
*ibpd
, u64 start
,
1728 u64 length
, u64 virt_addr
,
1729 int fd
, int access_flags
,
1730 struct uverbs_attr_bundle
*attrs
)
1732 struct efa_dev
*dev
= to_edev(ibpd
->device
);
1733 struct ib_umem_dmabuf
*umem_dmabuf
;
1737 mr
= efa_alloc_mr(ibpd
, access_flags
, &attrs
->driver_udata
);
1743 umem_dmabuf
= ib_umem_dmabuf_get_pinned(ibpd
->device
, start
, length
, fd
,
1745 if (IS_ERR(umem_dmabuf
)) {
1746 err
= PTR_ERR(umem_dmabuf
);
1747 ibdev_dbg(&dev
->ibdev
, "Failed to get dmabuf umem[%d]\n", err
);
1751 mr
->umem
= &umem_dmabuf
->umem
;
1752 err
= efa_register_mr(ibpd
, mr
, start
, length
, virt_addr
, access_flags
);
1759 ib_umem_release(mr
->umem
);
1763 atomic64_inc(&dev
->stats
.reg_mr_err
);
1764 return ERR_PTR(err
);
1767 struct ib_mr
*efa_reg_mr(struct ib_pd
*ibpd
, u64 start
, u64 length
,
1768 u64 virt_addr
, int access_flags
,
1769 struct ib_udata
*udata
)
1771 struct efa_dev
*dev
= to_edev(ibpd
->device
);
1775 mr
= efa_alloc_mr(ibpd
, access_flags
, udata
);
1781 mr
->umem
= ib_umem_get(ibpd
->device
, start
, length
, access_flags
);
1782 if (IS_ERR(mr
->umem
)) {
1783 err
= PTR_ERR(mr
->umem
);
1784 ibdev_dbg(&dev
->ibdev
,
1785 "Failed to pin and map user space memory[%d]\n", err
);
1789 err
= efa_register_mr(ibpd
, mr
, start
, length
, virt_addr
, access_flags
);
1796 ib_umem_release(mr
->umem
);
1800 atomic64_inc(&dev
->stats
.reg_mr_err
);
1801 return ERR_PTR(err
);
1804 static int UVERBS_HANDLER(EFA_IB_METHOD_MR_QUERY
)(struct uverbs_attr_bundle
*attrs
)
1806 struct ib_mr
*ibmr
= uverbs_attr_get_obj(attrs
, EFA_IB_ATTR_QUERY_MR_HANDLE
);
1807 struct efa_mr
*mr
= to_emr(ibmr
);
1808 u16 ic_id_validity
= 0;
1811 ret
= uverbs_copy_to(attrs
, EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID
,
1812 &mr
->ic_info
.recv_ic_id
, sizeof(mr
->ic_info
.recv_ic_id
));
1816 ret
= uverbs_copy_to(attrs
, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID
,
1817 &mr
->ic_info
.rdma_read_ic_id
, sizeof(mr
->ic_info
.rdma_read_ic_id
));
1821 ret
= uverbs_copy_to(attrs
, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID
,
1822 &mr
->ic_info
.rdma_recv_ic_id
, sizeof(mr
->ic_info
.rdma_recv_ic_id
));
1826 if (mr
->ic_info
.recv_ic_id_valid
)
1827 ic_id_validity
|= EFA_QUERY_MR_VALIDITY_RECV_IC_ID
;
1828 if (mr
->ic_info
.rdma_read_ic_id_valid
)
1829 ic_id_validity
|= EFA_QUERY_MR_VALIDITY_RDMA_READ_IC_ID
;
1830 if (mr
->ic_info
.rdma_recv_ic_id_valid
)
1831 ic_id_validity
|= EFA_QUERY_MR_VALIDITY_RDMA_RECV_IC_ID
;
1833 return uverbs_copy_to(attrs
, EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY
,
1834 &ic_id_validity
, sizeof(ic_id_validity
));
1837 int efa_dereg_mr(struct ib_mr
*ibmr
, struct ib_udata
*udata
)
1839 struct efa_dev
*dev
= to_edev(ibmr
->device
);
1840 struct efa_com_dereg_mr_params params
;
1841 struct efa_mr
*mr
= to_emr(ibmr
);
1844 ibdev_dbg(&dev
->ibdev
, "Deregister mr[%d]\n", ibmr
->lkey
);
1846 params
.l_key
= mr
->ibmr
.lkey
;
1847 err
= efa_com_dereg_mr(&dev
->edev
, ¶ms
);
1851 ib_umem_release(mr
->umem
);
1857 int efa_get_port_immutable(struct ib_device
*ibdev
, u32 port_num
,
1858 struct ib_port_immutable
*immutable
)
1860 struct ib_port_attr attr
;
1863 err
= ib_query_port(ibdev
, port_num
, &attr
);
1865 ibdev_dbg(ibdev
, "Couldn't query port err[%d]\n", err
);
1869 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
1870 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
1875 static int efa_dealloc_uar(struct efa_dev
*dev
, u16 uarn
)
1877 struct efa_com_dealloc_uar_params params
= {
1881 return efa_com_dealloc_uar(&dev
->edev
, ¶ms
);
1884 #define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \
1885 (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \
1888 static int efa_user_comp_handshake(const struct ib_ucontext
*ibucontext
,
1889 const struct efa_ibv_alloc_ucontext_cmd
*cmd
)
1891 struct efa_dev
*dev
= to_edev(ibucontext
->device
);
1894 if (EFA_CHECK_USER_COMP(dev
, cmd
->comp_mask
, max_tx_batch
,
1895 EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH
, attr_str
))
1898 if (EFA_CHECK_USER_COMP(dev
, cmd
->comp_mask
, min_sq_depth
,
1899 EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR
,
1906 ibdev_dbg(&dev
->ibdev
, "Userspace handshake failed for %s attribute\n",
1911 int efa_alloc_ucontext(struct ib_ucontext
*ibucontext
, struct ib_udata
*udata
)
1913 struct efa_ucontext
*ucontext
= to_eucontext(ibucontext
);
1914 struct efa_dev
*dev
= to_edev(ibucontext
->device
);
1915 struct efa_ibv_alloc_ucontext_resp resp
= {};
1916 struct efa_ibv_alloc_ucontext_cmd cmd
= {};
1917 struct efa_com_alloc_uar_result result
;
1921 * it's fine if the driver does not know all request fields,
1922 * we will ack input fields in our response.
1925 err
= ib_copy_from_udata(&cmd
, udata
,
1926 min(sizeof(cmd
), udata
->inlen
));
1928 ibdev_dbg(&dev
->ibdev
,
1929 "Cannot copy udata for alloc_ucontext\n");
1933 err
= efa_user_comp_handshake(ibucontext
, &cmd
);
1937 err
= efa_com_alloc_uar(&dev
->edev
, &result
);
1941 ucontext
->uarn
= result
.uarn
;
1943 resp
.cmds_supp_udata_mask
|= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE
;
1944 resp
.cmds_supp_udata_mask
|= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH
;
1945 resp
.sub_cqs_per_cq
= dev
->dev_attr
.sub_cqs_per_cq
;
1946 resp
.inline_buf_size
= dev
->dev_attr
.inline_buf_size
;
1947 resp
.max_llq_size
= dev
->dev_attr
.max_llq_size
;
1948 resp
.max_tx_batch
= dev
->dev_attr
.max_tx_batch
;
1949 resp
.min_sq_wr
= dev
->dev_attr
.min_sq_depth
;
1951 err
= ib_copy_to_udata(udata
, &resp
,
1952 min(sizeof(resp
), udata
->outlen
));
1954 goto err_dealloc_uar
;
1959 efa_dealloc_uar(dev
, result
.uarn
);
1961 atomic64_inc(&dev
->stats
.alloc_ucontext_err
);
1965 void efa_dealloc_ucontext(struct ib_ucontext
*ibucontext
)
1967 struct efa_ucontext
*ucontext
= to_eucontext(ibucontext
);
1968 struct efa_dev
*dev
= to_edev(ibucontext
->device
);
1970 efa_dealloc_uar(dev
, ucontext
->uarn
);
1973 void efa_mmap_free(struct rdma_user_mmap_entry
*rdma_entry
)
1975 struct efa_user_mmap_entry
*entry
= to_emmap(rdma_entry
);
1980 static int __efa_mmap(struct efa_dev
*dev
, struct efa_ucontext
*ucontext
,
1981 struct vm_area_struct
*vma
)
1983 struct rdma_user_mmap_entry
*rdma_entry
;
1984 struct efa_user_mmap_entry
*entry
;
1989 rdma_entry
= rdma_user_mmap_entry_get(&ucontext
->ibucontext
, vma
);
1991 ibdev_dbg(&dev
->ibdev
,
1992 "pgoff[%#lx] does not have valid entry\n",
1994 atomic64_inc(&dev
->stats
.mmap_err
);
1997 entry
= to_emmap(rdma_entry
);
1999 ibdev_dbg(&dev
->ibdev
,
2000 "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
2001 entry
->address
, rdma_entry
->npages
* PAGE_SIZE
,
2004 pfn
= entry
->address
>> PAGE_SHIFT
;
2005 switch (entry
->mmap_flag
) {
2006 case EFA_MMAP_IO_NC
:
2007 err
= rdma_user_mmap_io(&ucontext
->ibucontext
, vma
, pfn
,
2008 entry
->rdma_entry
.npages
* PAGE_SIZE
,
2009 pgprot_noncached(vma
->vm_page_prot
),
2012 case EFA_MMAP_IO_WC
:
2013 err
= rdma_user_mmap_io(&ucontext
->ibucontext
, vma
, pfn
,
2014 entry
->rdma_entry
.npages
* PAGE_SIZE
,
2015 pgprot_writecombine(vma
->vm_page_prot
),
2018 case EFA_MMAP_DMA_PAGE
:
2019 for (va
= vma
->vm_start
; va
< vma
->vm_end
;
2020 va
+= PAGE_SIZE
, pfn
++) {
2021 err
= vm_insert_page(vma
, va
, pfn_to_page(pfn
));
2033 "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
2034 entry
->address
, rdma_entry
->npages
* PAGE_SIZE
,
2035 entry
->mmap_flag
, err
);
2036 atomic64_inc(&dev
->stats
.mmap_err
);
2039 rdma_user_mmap_entry_put(rdma_entry
);
2043 int efa_mmap(struct ib_ucontext
*ibucontext
,
2044 struct vm_area_struct
*vma
)
2046 struct efa_ucontext
*ucontext
= to_eucontext(ibucontext
);
2047 struct efa_dev
*dev
= to_edev(ibucontext
->device
);
2048 size_t length
= vma
->vm_end
- vma
->vm_start
;
2050 ibdev_dbg(&dev
->ibdev
,
2051 "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
2052 vma
->vm_start
, vma
->vm_end
, length
, vma
->vm_pgoff
);
2054 return __efa_mmap(dev
, ucontext
, vma
);
2057 static int efa_ah_destroy(struct efa_dev
*dev
, struct efa_ah
*ah
)
2059 struct efa_com_destroy_ah_params params
= {
2061 .pdn
= to_epd(ah
->ibah
.pd
)->pdn
,
2064 return efa_com_destroy_ah(&dev
->edev
, ¶ms
);
2067 int efa_create_ah(struct ib_ah
*ibah
,
2068 struct rdma_ah_init_attr
*init_attr
,
2069 struct ib_udata
*udata
)
2071 struct rdma_ah_attr
*ah_attr
= init_attr
->ah_attr
;
2072 struct efa_dev
*dev
= to_edev(ibah
->device
);
2073 struct efa_com_create_ah_params params
= {};
2074 struct efa_ibv_create_ah_resp resp
= {};
2075 struct efa_com_create_ah_result result
;
2076 struct efa_ah
*ah
= to_eah(ibah
);
2079 if (!(init_attr
->flags
& RDMA_CREATE_AH_SLEEPABLE
)) {
2080 ibdev_dbg(&dev
->ibdev
,
2081 "Create address handle is not supported in atomic context\n");
2087 !ib_is_udata_cleared(udata
, 0, udata
->inlen
)) {
2088 ibdev_dbg(&dev
->ibdev
, "Incompatible ABI params\n");
2093 memcpy(params
.dest_addr
, ah_attr
->grh
.dgid
.raw
,
2094 sizeof(params
.dest_addr
));
2095 params
.pdn
= to_epd(ibah
->pd
)->pdn
;
2096 err
= efa_com_create_ah(&dev
->edev
, ¶ms
, &result
);
2100 memcpy(ah
->id
, ah_attr
->grh
.dgid
.raw
, sizeof(ah
->id
));
2103 resp
.efa_address_handle
= result
.ah
;
2105 if (udata
->outlen
) {
2106 err
= ib_copy_to_udata(udata
, &resp
,
2107 min(sizeof(resp
), udata
->outlen
));
2109 ibdev_dbg(&dev
->ibdev
,
2110 "Failed to copy udata for create_ah response\n");
2111 goto err_destroy_ah
;
2114 ibdev_dbg(&dev
->ibdev
, "Created ah[%d]\n", ah
->ah
);
2119 efa_ah_destroy(dev
, ah
);
2121 atomic64_inc(&dev
->stats
.create_ah_err
);
2125 int efa_destroy_ah(struct ib_ah
*ibah
, u32 flags
)
2127 struct efa_dev
*dev
= to_edev(ibah
->pd
->device
);
2128 struct efa_ah
*ah
= to_eah(ibah
);
2130 ibdev_dbg(&dev
->ibdev
, "Destroy ah[%d]\n", ah
->ah
);
2132 if (!(flags
& RDMA_DESTROY_AH_SLEEPABLE
)) {
2133 ibdev_dbg(&dev
->ibdev
,
2134 "Destroy address handle is not supported in atomic context\n");
2138 efa_ah_destroy(dev
, ah
);
2142 struct rdma_hw_stats
*efa_alloc_hw_port_stats(struct ib_device
*ibdev
,
2145 return rdma_alloc_hw_stats_struct(efa_port_stats_descs
,
2146 ARRAY_SIZE(efa_port_stats_descs
),
2147 RDMA_HW_STATS_DEFAULT_LIFESPAN
);
2150 struct rdma_hw_stats
*efa_alloc_hw_device_stats(struct ib_device
*ibdev
)
2152 return rdma_alloc_hw_stats_struct(efa_device_stats_descs
,
2153 ARRAY_SIZE(efa_device_stats_descs
),
2154 RDMA_HW_STATS_DEFAULT_LIFESPAN
);
2157 static int efa_fill_device_stats(struct efa_dev
*dev
,
2158 struct rdma_hw_stats
*stats
)
2160 struct efa_com_stats_admin
*as
= &dev
->edev
.aq
.stats
;
2161 struct efa_stats
*s
= &dev
->stats
;
2163 stats
->value
[EFA_SUBMITTED_CMDS
] = atomic64_read(&as
->submitted_cmd
);
2164 stats
->value
[EFA_COMPLETED_CMDS
] = atomic64_read(&as
->completed_cmd
);
2165 stats
->value
[EFA_CMDS_ERR
] = atomic64_read(&as
->cmd_err
);
2166 stats
->value
[EFA_NO_COMPLETION_CMDS
] = atomic64_read(&as
->no_completion
);
2168 stats
->value
[EFA_KEEP_ALIVE_RCVD
] = atomic64_read(&s
->keep_alive_rcvd
);
2169 stats
->value
[EFA_ALLOC_PD_ERR
] = atomic64_read(&s
->alloc_pd_err
);
2170 stats
->value
[EFA_CREATE_QP_ERR
] = atomic64_read(&s
->create_qp_err
);
2171 stats
->value
[EFA_CREATE_CQ_ERR
] = atomic64_read(&s
->create_cq_err
);
2172 stats
->value
[EFA_REG_MR_ERR
] = atomic64_read(&s
->reg_mr_err
);
2173 stats
->value
[EFA_ALLOC_UCONTEXT_ERR
] =
2174 atomic64_read(&s
->alloc_ucontext_err
);
2175 stats
->value
[EFA_CREATE_AH_ERR
] = atomic64_read(&s
->create_ah_err
);
2176 stats
->value
[EFA_MMAP_ERR
] = atomic64_read(&s
->mmap_err
);
2178 return ARRAY_SIZE(efa_device_stats_descs
);
2181 static int efa_fill_port_stats(struct efa_dev
*dev
, struct rdma_hw_stats
*stats
,
2184 struct efa_com_get_stats_params params
= {};
2185 union efa_com_get_stats_result result
;
2186 struct efa_com_rdma_write_stats
*rws
;
2187 struct efa_com_rdma_read_stats
*rrs
;
2188 struct efa_com_messages_stats
*ms
;
2189 struct efa_com_basic_stats
*bs
;
2192 params
.scope
= EFA_ADMIN_GET_STATS_SCOPE_ALL
;
2193 params
.type
= EFA_ADMIN_GET_STATS_TYPE_BASIC
;
2195 err
= efa_com_get_stats(&dev
->edev
, ¶ms
, &result
);
2199 bs
= &result
.basic_stats
;
2200 stats
->value
[EFA_TX_BYTES
] = bs
->tx_bytes
;
2201 stats
->value
[EFA_TX_PKTS
] = bs
->tx_pkts
;
2202 stats
->value
[EFA_RX_BYTES
] = bs
->rx_bytes
;
2203 stats
->value
[EFA_RX_PKTS
] = bs
->rx_pkts
;
2204 stats
->value
[EFA_RX_DROPS
] = bs
->rx_drops
;
2206 params
.type
= EFA_ADMIN_GET_STATS_TYPE_MESSAGES
;
2207 err
= efa_com_get_stats(&dev
->edev
, ¶ms
, &result
);
2211 ms
= &result
.messages_stats
;
2212 stats
->value
[EFA_SEND_BYTES
] = ms
->send_bytes
;
2213 stats
->value
[EFA_SEND_WRS
] = ms
->send_wrs
;
2214 stats
->value
[EFA_RECV_BYTES
] = ms
->recv_bytes
;
2215 stats
->value
[EFA_RECV_WRS
] = ms
->recv_wrs
;
2217 params
.type
= EFA_ADMIN_GET_STATS_TYPE_RDMA_READ
;
2218 err
= efa_com_get_stats(&dev
->edev
, ¶ms
, &result
);
2222 rrs
= &result
.rdma_read_stats
;
2223 stats
->value
[EFA_RDMA_READ_WRS
] = rrs
->read_wrs
;
2224 stats
->value
[EFA_RDMA_READ_BYTES
] = rrs
->read_bytes
;
2225 stats
->value
[EFA_RDMA_READ_WR_ERR
] = rrs
->read_wr_err
;
2226 stats
->value
[EFA_RDMA_READ_RESP_BYTES
] = rrs
->read_resp_bytes
;
2228 if (EFA_DEV_CAP(dev
, RDMA_WRITE
)) {
2229 params
.type
= EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE
;
2230 err
= efa_com_get_stats(&dev
->edev
, ¶ms
, &result
);
2234 rws
= &result
.rdma_write_stats
;
2235 stats
->value
[EFA_RDMA_WRITE_WRS
] = rws
->write_wrs
;
2236 stats
->value
[EFA_RDMA_WRITE_BYTES
] = rws
->write_bytes
;
2237 stats
->value
[EFA_RDMA_WRITE_WR_ERR
] = rws
->write_wr_err
;
2238 stats
->value
[EFA_RDMA_WRITE_RECV_BYTES
] = rws
->write_recv_bytes
;
2241 return ARRAY_SIZE(efa_port_stats_descs
);
2244 int efa_get_hw_stats(struct ib_device
*ibdev
, struct rdma_hw_stats
*stats
,
2245 u32 port_num
, int index
)
2248 return efa_fill_port_stats(to_edev(ibdev
), stats
, port_num
);
2250 return efa_fill_device_stats(to_edev(ibdev
), stats
);
2253 enum rdma_link_layer
efa_port_link_layer(struct ib_device
*ibdev
,
2256 return IB_LINK_LAYER_UNSPECIFIED
;
2259 DECLARE_UVERBS_NAMED_METHOD(EFA_IB_METHOD_MR_QUERY
,
2260 UVERBS_ATTR_IDR(EFA_IB_ATTR_QUERY_MR_HANDLE
,
2264 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY
,
2265 UVERBS_ATTR_TYPE(u16
),
2267 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID
,
2268 UVERBS_ATTR_TYPE(u16
),
2270 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID
,
2271 UVERBS_ATTR_TYPE(u16
),
2273 UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID
,
2274 UVERBS_ATTR_TYPE(u16
),
2277 ADD_UVERBS_METHODS(efa_mr
,
2279 &UVERBS_METHOD(EFA_IB_METHOD_MR_QUERY
));
2281 const struct uapi_definition efa_uapi_defs
[] = {
2282 UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR
,