1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
6 #include <linux/vmalloc.h>
8 #include <rdma/ib_addr.h>
9 #include <rdma/ib_umem.h>
10 #include <rdma/ib_user_verbs.h>
11 #include <rdma/ib_verbs.h>
12 #include <rdma/uverbs_ioctl.h>
17 EFA_MMAP_DMA_PAGE
= 0,
22 #define EFA_AENQ_ENABLED_GROUPS \
23 (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
24 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
26 struct efa_user_mmap_entry
{
27 struct rdma_user_mmap_entry rdma_entry
;
32 #define EFA_DEFINE_STATS(op) \
33 op(EFA_TX_BYTES, "tx_bytes") \
34 op(EFA_TX_PKTS, "tx_pkts") \
35 op(EFA_RX_BYTES, "rx_bytes") \
36 op(EFA_RX_PKTS, "rx_pkts") \
37 op(EFA_RX_DROPS, "rx_drops") \
38 op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
39 op(EFA_COMPLETED_CMDS, "completed_cmds") \
40 op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
41 op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
42 op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
43 op(EFA_CREATE_QP_ERR, "create_qp_err") \
44 op(EFA_REG_MR_ERR, "reg_mr_err") \
45 op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
46 op(EFA_CREATE_AH_ERR, "create_ah_err")
48 #define EFA_STATS_ENUM(ename, name) ename,
49 #define EFA_STATS_STR(ename, name) [ename] = name,
52 EFA_DEFINE_STATS(EFA_STATS_ENUM
)
55 static const char *const efa_stats_names
[] = {
56 EFA_DEFINE_STATS(EFA_STATS_STR
)
59 #define EFA_CHUNK_PAYLOAD_SHIFT 12
60 #define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
61 #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8
63 #define EFA_CHUNK_SHIFT 12
64 #define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT)
65 #define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info)
67 #define EFA_PTRS_PER_CHUNK \
68 ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
70 #define EFA_CHUNK_USED_SIZE \
71 ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
79 struct pbl_chunk_list
{
80 struct pbl_chunk
*chunks
;
90 u32 pbl_buf_size_in_pages
;
91 struct scatterlist
*sgl
;
93 struct pbl_chunk_list chunk_list
;
97 u32 pbl_buf_size_in_bytes
;
98 u8 physically_continuous
;
101 static inline struct efa_dev
*to_edev(struct ib_device
*ibdev
)
103 return container_of(ibdev
, struct efa_dev
, ibdev
);
106 static inline struct efa_ucontext
*to_eucontext(struct ib_ucontext
*ibucontext
)
108 return container_of(ibucontext
, struct efa_ucontext
, ibucontext
);
111 static inline struct efa_pd
*to_epd(struct ib_pd
*ibpd
)
113 return container_of(ibpd
, struct efa_pd
, ibpd
);
116 static inline struct efa_mr
*to_emr(struct ib_mr
*ibmr
)
118 return container_of(ibmr
, struct efa_mr
, ibmr
);
121 static inline struct efa_qp
*to_eqp(struct ib_qp
*ibqp
)
123 return container_of(ibqp
, struct efa_qp
, ibqp
);
126 static inline struct efa_cq
*to_ecq(struct ib_cq
*ibcq
)
128 return container_of(ibcq
, struct efa_cq
, ibcq
);
131 static inline struct efa_ah
*to_eah(struct ib_ah
*ibah
)
133 return container_of(ibah
, struct efa_ah
, ibah
);
136 static inline struct efa_user_mmap_entry
*
137 to_emmap(struct rdma_user_mmap_entry
*rdma_entry
)
139 return container_of(rdma_entry
, struct efa_user_mmap_entry
, rdma_entry
);
142 static inline bool is_rdma_read_cap(struct efa_dev
*dev
)
144 return dev
->dev_attr
.device_caps
& EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK
;
147 #define is_reserved_cleared(reserved) \
148 !memchr_inv(reserved, 0, sizeof(reserved))
150 static void *efa_zalloc_mapped(struct efa_dev
*dev
, dma_addr_t
*dma_addr
,
151 size_t size
, enum dma_data_direction dir
)
155 addr
= alloc_pages_exact(size
, GFP_KERNEL
| __GFP_ZERO
);
159 *dma_addr
= dma_map_single(&dev
->pdev
->dev
, addr
, size
, dir
);
160 if (dma_mapping_error(&dev
->pdev
->dev
, *dma_addr
)) {
161 ibdev_err(&dev
->ibdev
, "Failed to map DMA address\n");
162 free_pages_exact(addr
, size
);
169 static void efa_free_mapped(struct efa_dev
*dev
, void *cpu_addr
,
171 size_t size
, enum dma_data_direction dir
)
173 dma_unmap_single(&dev
->pdev
->dev
, dma_addr
, size
, dir
);
174 free_pages_exact(cpu_addr
, size
);
177 int efa_query_device(struct ib_device
*ibdev
,
178 struct ib_device_attr
*props
,
179 struct ib_udata
*udata
)
181 struct efa_com_get_device_attr_result
*dev_attr
;
182 struct efa_ibv_ex_query_device_resp resp
= {};
183 struct efa_dev
*dev
= to_edev(ibdev
);
186 if (udata
&& udata
->inlen
&&
187 !ib_is_udata_cleared(udata
, 0, udata
->inlen
)) {
189 "Incompatible ABI params, udata not cleared\n");
193 dev_attr
= &dev
->dev_attr
;
195 memset(props
, 0, sizeof(*props
));
196 props
->max_mr_size
= dev_attr
->max_mr_pages
* PAGE_SIZE
;
197 props
->page_size_cap
= dev_attr
->page_size_cap
;
198 props
->vendor_id
= dev
->pdev
->vendor
;
199 props
->vendor_part_id
= dev
->pdev
->device
;
200 props
->hw_ver
= dev
->pdev
->subsystem_device
;
201 props
->max_qp
= dev_attr
->max_qp
;
202 props
->max_cq
= dev_attr
->max_cq
;
203 props
->max_pd
= dev_attr
->max_pd
;
204 props
->max_mr
= dev_attr
->max_mr
;
205 props
->max_ah
= dev_attr
->max_ah
;
206 props
->max_cqe
= dev_attr
->max_cq_depth
;
207 props
->max_qp_wr
= min_t(u32
, dev_attr
->max_sq_depth
,
208 dev_attr
->max_rq_depth
);
209 props
->max_send_sge
= dev_attr
->max_sq_sge
;
210 props
->max_recv_sge
= dev_attr
->max_rq_sge
;
211 props
->max_sge_rd
= dev_attr
->max_wr_rdma_sge
;
212 props
->max_pkeys
= 1;
214 if (udata
&& udata
->outlen
) {
215 resp
.max_sq_sge
= dev_attr
->max_sq_sge
;
216 resp
.max_rq_sge
= dev_attr
->max_rq_sge
;
217 resp
.max_sq_wr
= dev_attr
->max_sq_depth
;
218 resp
.max_rq_wr
= dev_attr
->max_rq_depth
;
219 resp
.max_rdma_size
= dev_attr
->max_rdma_size
;
221 if (is_rdma_read_cap(dev
))
222 resp
.device_caps
|= EFA_QUERY_DEVICE_CAPS_RDMA_READ
;
224 err
= ib_copy_to_udata(udata
, &resp
,
225 min(sizeof(resp
), udata
->outlen
));
228 "Failed to copy udata for query_device\n");
236 int efa_query_port(struct ib_device
*ibdev
, u8 port
,
237 struct ib_port_attr
*props
)
239 struct efa_dev
*dev
= to_edev(ibdev
);
243 props
->state
= IB_PORT_ACTIVE
;
244 props
->phys_state
= IB_PORT_PHYS_STATE_LINK_UP
;
245 props
->gid_tbl_len
= 1;
246 props
->pkey_tbl_len
= 1;
247 props
->active_speed
= IB_SPEED_EDR
;
248 props
->active_width
= IB_WIDTH_4X
;
249 props
->max_mtu
= ib_mtu_int_to_enum(dev
->dev_attr
.mtu
);
250 props
->active_mtu
= ib_mtu_int_to_enum(dev
->dev_attr
.mtu
);
251 props
->max_msg_sz
= dev
->dev_attr
.mtu
;
252 props
->max_vl_num
= 1;
257 int efa_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*qp_attr
,
259 struct ib_qp_init_attr
*qp_init_attr
)
261 struct efa_dev
*dev
= to_edev(ibqp
->device
);
262 struct efa_com_query_qp_params params
= {};
263 struct efa_com_query_qp_result result
;
264 struct efa_qp
*qp
= to_eqp(ibqp
);
267 #define EFA_QUERY_QP_SUPP_MASK \
268 (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
269 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
271 if (qp_attr_mask
& ~EFA_QUERY_QP_SUPP_MASK
) {
272 ibdev_dbg(&dev
->ibdev
,
273 "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
274 qp_attr_mask
, EFA_QUERY_QP_SUPP_MASK
);
278 memset(qp_attr
, 0, sizeof(*qp_attr
));
279 memset(qp_init_attr
, 0, sizeof(*qp_init_attr
));
281 params
.qp_handle
= qp
->qp_handle
;
282 err
= efa_com_query_qp(&dev
->edev
, ¶ms
, &result
);
286 qp_attr
->qp_state
= result
.qp_state
;
287 qp_attr
->qkey
= result
.qkey
;
288 qp_attr
->sq_psn
= result
.sq_psn
;
289 qp_attr
->sq_draining
= result
.sq_draining
;
290 qp_attr
->port_num
= 1;
292 qp_attr
->cap
.max_send_wr
= qp
->max_send_wr
;
293 qp_attr
->cap
.max_recv_wr
= qp
->max_recv_wr
;
294 qp_attr
->cap
.max_send_sge
= qp
->max_send_sge
;
295 qp_attr
->cap
.max_recv_sge
= qp
->max_recv_sge
;
296 qp_attr
->cap
.max_inline_data
= qp
->max_inline_data
;
298 qp_init_attr
->qp_type
= ibqp
->qp_type
;
299 qp_init_attr
->recv_cq
= ibqp
->recv_cq
;
300 qp_init_attr
->send_cq
= ibqp
->send_cq
;
301 qp_init_attr
->qp_context
= ibqp
->qp_context
;
302 qp_init_attr
->cap
= qp_attr
->cap
;
307 int efa_query_gid(struct ib_device
*ibdev
, u8 port
, int index
,
310 struct efa_dev
*dev
= to_edev(ibdev
);
312 memcpy(gid
->raw
, dev
->dev_attr
.addr
, sizeof(dev
->dev_attr
.addr
));
317 int efa_query_pkey(struct ib_device
*ibdev
, u8 port
, u16 index
,
327 static int efa_pd_dealloc(struct efa_dev
*dev
, u16 pdn
)
329 struct efa_com_dealloc_pd_params params
= {
333 return efa_com_dealloc_pd(&dev
->edev
, ¶ms
);
336 int efa_alloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
338 struct efa_dev
*dev
= to_edev(ibpd
->device
);
339 struct efa_ibv_alloc_pd_resp resp
= {};
340 struct efa_com_alloc_pd_result result
;
341 struct efa_pd
*pd
= to_epd(ibpd
);
345 !ib_is_udata_cleared(udata
, 0, udata
->inlen
)) {
346 ibdev_dbg(&dev
->ibdev
,
347 "Incompatible ABI params, udata not cleared\n");
352 err
= efa_com_alloc_pd(&dev
->edev
, &result
);
356 pd
->pdn
= result
.pdn
;
357 resp
.pdn
= result
.pdn
;
360 err
= ib_copy_to_udata(udata
, &resp
,
361 min(sizeof(resp
), udata
->outlen
));
363 ibdev_dbg(&dev
->ibdev
,
364 "Failed to copy udata for alloc_pd\n");
369 ibdev_dbg(&dev
->ibdev
, "Allocated pd[%d]\n", pd
->pdn
);
374 efa_pd_dealloc(dev
, result
.pdn
);
376 atomic64_inc(&dev
->stats
.sw_stats
.alloc_pd_err
);
380 void efa_dealloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
382 struct efa_dev
*dev
= to_edev(ibpd
->device
);
383 struct efa_pd
*pd
= to_epd(ibpd
);
385 ibdev_dbg(&dev
->ibdev
, "Dealloc pd[%d]\n", pd
->pdn
);
386 efa_pd_dealloc(dev
, pd
->pdn
);
389 static int efa_destroy_qp_handle(struct efa_dev
*dev
, u32 qp_handle
)
391 struct efa_com_destroy_qp_params params
= { .qp_handle
= qp_handle
};
393 return efa_com_destroy_qp(&dev
->edev
, ¶ms
);
396 static void efa_qp_user_mmap_entries_remove(struct efa_qp
*qp
)
398 rdma_user_mmap_entry_remove(qp
->rq_mmap_entry
);
399 rdma_user_mmap_entry_remove(qp
->rq_db_mmap_entry
);
400 rdma_user_mmap_entry_remove(qp
->llq_desc_mmap_entry
);
401 rdma_user_mmap_entry_remove(qp
->sq_db_mmap_entry
);
404 int efa_destroy_qp(struct ib_qp
*ibqp
, struct ib_udata
*udata
)
406 struct efa_dev
*dev
= to_edev(ibqp
->pd
->device
);
407 struct efa_qp
*qp
= to_eqp(ibqp
);
410 ibdev_dbg(&dev
->ibdev
, "Destroy qp[%u]\n", ibqp
->qp_num
);
412 efa_qp_user_mmap_entries_remove(qp
);
414 err
= efa_destroy_qp_handle(dev
, qp
->qp_handle
);
418 if (qp
->rq_cpu_addr
) {
419 ibdev_dbg(&dev
->ibdev
,
420 "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
421 qp
->rq_cpu_addr
, qp
->rq_size
,
423 efa_free_mapped(dev
, qp
->rq_cpu_addr
, qp
->rq_dma_addr
,
424 qp
->rq_size
, DMA_TO_DEVICE
);
431 static struct rdma_user_mmap_entry
*
432 efa_user_mmap_entry_insert(struct ib_ucontext
*ucontext
,
433 u64 address
, size_t length
,
434 u8 mmap_flag
, u64
*offset
)
436 struct efa_user_mmap_entry
*entry
= kzalloc(sizeof(*entry
), GFP_KERNEL
);
442 entry
->address
= address
;
443 entry
->mmap_flag
= mmap_flag
;
445 err
= rdma_user_mmap_entry_insert(ucontext
, &entry
->rdma_entry
,
451 *offset
= rdma_user_mmap_get_offset(&entry
->rdma_entry
);
453 return &entry
->rdma_entry
;
456 static int qp_mmap_entries_setup(struct efa_qp
*qp
,
458 struct efa_ucontext
*ucontext
,
459 struct efa_com_create_qp_params
*params
,
460 struct efa_ibv_create_qp_resp
*resp
)
465 address
= dev
->db_bar_addr
+ resp
->sq_db_offset
;
466 qp
->sq_db_mmap_entry
=
467 efa_user_mmap_entry_insert(&ucontext
->ibucontext
,
469 PAGE_SIZE
, EFA_MMAP_IO_NC
,
470 &resp
->sq_db_mmap_key
);
471 if (!qp
->sq_db_mmap_entry
)
474 resp
->sq_db_offset
&= ~PAGE_MASK
;
476 address
= dev
->mem_bar_addr
+ resp
->llq_desc_offset
;
477 length
= PAGE_ALIGN(params
->sq_ring_size_in_bytes
+
478 (resp
->llq_desc_offset
& ~PAGE_MASK
));
480 qp
->llq_desc_mmap_entry
=
481 efa_user_mmap_entry_insert(&ucontext
->ibucontext
,
484 &resp
->llq_desc_mmap_key
);
485 if (!qp
->llq_desc_mmap_entry
)
486 goto err_remove_mmap
;
488 resp
->llq_desc_offset
&= ~PAGE_MASK
;
491 address
= dev
->db_bar_addr
+ resp
->rq_db_offset
;
493 qp
->rq_db_mmap_entry
=
494 efa_user_mmap_entry_insert(&ucontext
->ibucontext
,
497 &resp
->rq_db_mmap_key
);
498 if (!qp
->rq_db_mmap_entry
)
499 goto err_remove_mmap
;
501 resp
->rq_db_offset
&= ~PAGE_MASK
;
503 address
= virt_to_phys(qp
->rq_cpu_addr
);
505 efa_user_mmap_entry_insert(&ucontext
->ibucontext
,
506 address
, qp
->rq_size
,
509 if (!qp
->rq_mmap_entry
)
510 goto err_remove_mmap
;
512 resp
->rq_mmap_size
= qp
->rq_size
;
518 efa_qp_user_mmap_entries_remove(qp
);
523 static int efa_qp_validate_cap(struct efa_dev
*dev
,
524 struct ib_qp_init_attr
*init_attr
)
526 if (init_attr
->cap
.max_send_wr
> dev
->dev_attr
.max_sq_depth
) {
527 ibdev_dbg(&dev
->ibdev
,
528 "qp: requested send wr[%u] exceeds the max[%u]\n",
529 init_attr
->cap
.max_send_wr
,
530 dev
->dev_attr
.max_sq_depth
);
533 if (init_attr
->cap
.max_recv_wr
> dev
->dev_attr
.max_rq_depth
) {
534 ibdev_dbg(&dev
->ibdev
,
535 "qp: requested receive wr[%u] exceeds the max[%u]\n",
536 init_attr
->cap
.max_recv_wr
,
537 dev
->dev_attr
.max_rq_depth
);
540 if (init_attr
->cap
.max_send_sge
> dev
->dev_attr
.max_sq_sge
) {
541 ibdev_dbg(&dev
->ibdev
,
542 "qp: requested sge send[%u] exceeds the max[%u]\n",
543 init_attr
->cap
.max_send_sge
, dev
->dev_attr
.max_sq_sge
);
546 if (init_attr
->cap
.max_recv_sge
> dev
->dev_attr
.max_rq_sge
) {
547 ibdev_dbg(&dev
->ibdev
,
548 "qp: requested sge recv[%u] exceeds the max[%u]\n",
549 init_attr
->cap
.max_recv_sge
, dev
->dev_attr
.max_rq_sge
);
552 if (init_attr
->cap
.max_inline_data
> dev
->dev_attr
.inline_buf_size
) {
553 ibdev_dbg(&dev
->ibdev
,
554 "qp: requested inline data[%u] exceeds the max[%u]\n",
555 init_attr
->cap
.max_inline_data
,
556 dev
->dev_attr
.inline_buf_size
);
563 static int efa_qp_validate_attr(struct efa_dev
*dev
,
564 struct ib_qp_init_attr
*init_attr
)
566 if (init_attr
->qp_type
!= IB_QPT_DRIVER
&&
567 init_attr
->qp_type
!= IB_QPT_UD
) {
568 ibdev_dbg(&dev
->ibdev
,
569 "Unsupported qp type %d\n", init_attr
->qp_type
);
573 if (init_attr
->srq
) {
574 ibdev_dbg(&dev
->ibdev
, "SRQ is not supported\n");
578 if (init_attr
->create_flags
) {
579 ibdev_dbg(&dev
->ibdev
, "Unsupported create flags\n");
586 struct ib_qp
*efa_create_qp(struct ib_pd
*ibpd
,
587 struct ib_qp_init_attr
*init_attr
,
588 struct ib_udata
*udata
)
590 struct efa_com_create_qp_params create_qp_params
= {};
591 struct efa_com_create_qp_result create_qp_resp
;
592 struct efa_dev
*dev
= to_edev(ibpd
->device
);
593 struct efa_ibv_create_qp_resp resp
= {};
594 struct efa_ibv_create_qp cmd
= {};
595 struct efa_ucontext
*ucontext
;
599 ucontext
= rdma_udata_to_drv_context(udata
, struct efa_ucontext
,
602 err
= efa_qp_validate_cap(dev
, init_attr
);
606 err
= efa_qp_validate_attr(dev
, init_attr
);
610 if (offsetofend(typeof(cmd
), driver_qp_type
) > udata
->inlen
) {
611 ibdev_dbg(&dev
->ibdev
,
612 "Incompatible ABI params, no input udata\n");
617 if (udata
->inlen
> sizeof(cmd
) &&
618 !ib_is_udata_cleared(udata
, sizeof(cmd
),
619 udata
->inlen
- sizeof(cmd
))) {
620 ibdev_dbg(&dev
->ibdev
,
621 "Incompatible ABI params, unknown fields in udata\n");
626 err
= ib_copy_from_udata(&cmd
, udata
,
627 min(sizeof(cmd
), udata
->inlen
));
629 ibdev_dbg(&dev
->ibdev
,
630 "Cannot copy udata for create_qp\n");
635 ibdev_dbg(&dev
->ibdev
,
636 "Incompatible ABI params, unknown fields in udata\n");
641 qp
= kzalloc(sizeof(*qp
), GFP_KERNEL
);
647 create_qp_params
.uarn
= ucontext
->uarn
;
648 create_qp_params
.pd
= to_epd(ibpd
)->pdn
;
650 if (init_attr
->qp_type
== IB_QPT_UD
) {
651 create_qp_params
.qp_type
= EFA_ADMIN_QP_TYPE_UD
;
652 } else if (cmd
.driver_qp_type
== EFA_QP_DRIVER_TYPE_SRD
) {
653 create_qp_params
.qp_type
= EFA_ADMIN_QP_TYPE_SRD
;
655 ibdev_dbg(&dev
->ibdev
,
656 "Unsupported qp type %d driver qp type %d\n",
657 init_attr
->qp_type
, cmd
.driver_qp_type
);
662 ibdev_dbg(&dev
->ibdev
, "Create QP: qp type %d driver qp type %#x\n",
663 init_attr
->qp_type
, cmd
.driver_qp_type
);
664 create_qp_params
.send_cq_idx
= to_ecq(init_attr
->send_cq
)->cq_idx
;
665 create_qp_params
.recv_cq_idx
= to_ecq(init_attr
->recv_cq
)->cq_idx
;
666 create_qp_params
.sq_depth
= init_attr
->cap
.max_send_wr
;
667 create_qp_params
.sq_ring_size_in_bytes
= cmd
.sq_ring_size
;
669 create_qp_params
.rq_depth
= init_attr
->cap
.max_recv_wr
;
670 create_qp_params
.rq_ring_size_in_bytes
= cmd
.rq_ring_size
;
671 qp
->rq_size
= PAGE_ALIGN(create_qp_params
.rq_ring_size_in_bytes
);
673 qp
->rq_cpu_addr
= efa_zalloc_mapped(dev
, &qp
->rq_dma_addr
,
674 qp
->rq_size
, DMA_TO_DEVICE
);
675 if (!qp
->rq_cpu_addr
) {
680 ibdev_dbg(&dev
->ibdev
,
681 "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
682 qp
->rq_cpu_addr
, qp
->rq_size
, &qp
->rq_dma_addr
);
683 create_qp_params
.rq_base_addr
= qp
->rq_dma_addr
;
686 err
= efa_com_create_qp(&dev
->edev
, &create_qp_params
,
689 goto err_free_mapped
;
691 resp
.sq_db_offset
= create_qp_resp
.sq_db_offset
;
692 resp
.rq_db_offset
= create_qp_resp
.rq_db_offset
;
693 resp
.llq_desc_offset
= create_qp_resp
.llq_descriptors_offset
;
694 resp
.send_sub_cq_idx
= create_qp_resp
.send_sub_cq_idx
;
695 resp
.recv_sub_cq_idx
= create_qp_resp
.recv_sub_cq_idx
;
697 err
= qp_mmap_entries_setup(qp
, dev
, ucontext
, &create_qp_params
,
702 qp
->qp_handle
= create_qp_resp
.qp_handle
;
703 qp
->ibqp
.qp_num
= create_qp_resp
.qp_num
;
704 qp
->ibqp
.qp_type
= init_attr
->qp_type
;
705 qp
->max_send_wr
= init_attr
->cap
.max_send_wr
;
706 qp
->max_recv_wr
= init_attr
->cap
.max_recv_wr
;
707 qp
->max_send_sge
= init_attr
->cap
.max_send_sge
;
708 qp
->max_recv_sge
= init_attr
->cap
.max_recv_sge
;
709 qp
->max_inline_data
= init_attr
->cap
.max_inline_data
;
712 err
= ib_copy_to_udata(udata
, &resp
,
713 min(sizeof(resp
), udata
->outlen
));
715 ibdev_dbg(&dev
->ibdev
,
716 "Failed to copy udata for qp[%u]\n",
717 create_qp_resp
.qp_num
);
718 goto err_remove_mmap_entries
;
722 ibdev_dbg(&dev
->ibdev
, "Created qp[%d]\n", qp
->ibqp
.qp_num
);
726 err_remove_mmap_entries
:
727 efa_qp_user_mmap_entries_remove(qp
);
729 efa_destroy_qp_handle(dev
, create_qp_resp
.qp_handle
);
732 efa_free_mapped(dev
, qp
->rq_cpu_addr
, qp
->rq_dma_addr
,
733 qp
->rq_size
, DMA_TO_DEVICE
);
737 atomic64_inc(&dev
->stats
.sw_stats
.create_qp_err
);
741 static int efa_modify_qp_validate(struct efa_dev
*dev
, struct efa_qp
*qp
,
742 struct ib_qp_attr
*qp_attr
, int qp_attr_mask
,
743 enum ib_qp_state cur_state
,
744 enum ib_qp_state new_state
)
746 #define EFA_MODIFY_QP_SUPP_MASK \
747 (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
748 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
750 if (qp_attr_mask
& ~EFA_MODIFY_QP_SUPP_MASK
) {
751 ibdev_dbg(&dev
->ibdev
,
752 "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
753 qp_attr_mask
, EFA_MODIFY_QP_SUPP_MASK
);
757 if (!ib_modify_qp_is_ok(cur_state
, new_state
, IB_QPT_UD
,
759 ibdev_dbg(&dev
->ibdev
, "Invalid modify QP parameters\n");
763 if ((qp_attr_mask
& IB_QP_PORT
) && qp_attr
->port_num
!= 1) {
764 ibdev_dbg(&dev
->ibdev
, "Can't change port num\n");
768 if ((qp_attr_mask
& IB_QP_PKEY_INDEX
) && qp_attr
->pkey_index
) {
769 ibdev_dbg(&dev
->ibdev
, "Can't change pkey index\n");
776 int efa_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*qp_attr
,
777 int qp_attr_mask
, struct ib_udata
*udata
)
779 struct efa_dev
*dev
= to_edev(ibqp
->device
);
780 struct efa_com_modify_qp_params params
= {};
781 struct efa_qp
*qp
= to_eqp(ibqp
);
782 enum ib_qp_state cur_state
;
783 enum ib_qp_state new_state
;
787 !ib_is_udata_cleared(udata
, 0, udata
->inlen
)) {
788 ibdev_dbg(&dev
->ibdev
,
789 "Incompatible ABI params, udata not cleared\n");
793 cur_state
= qp_attr_mask
& IB_QP_CUR_STATE
? qp_attr
->cur_qp_state
:
795 new_state
= qp_attr_mask
& IB_QP_STATE
? qp_attr
->qp_state
: cur_state
;
797 err
= efa_modify_qp_validate(dev
, qp
, qp_attr
, qp_attr_mask
, cur_state
,
802 params
.qp_handle
= qp
->qp_handle
;
804 if (qp_attr_mask
& IB_QP_STATE
) {
805 params
.modify_mask
|= BIT(EFA_ADMIN_QP_STATE_BIT
) |
806 BIT(EFA_ADMIN_CUR_QP_STATE_BIT
);
807 params
.cur_qp_state
= qp_attr
->cur_qp_state
;
808 params
.qp_state
= qp_attr
->qp_state
;
811 if (qp_attr_mask
& IB_QP_EN_SQD_ASYNC_NOTIFY
) {
812 params
.modify_mask
|=
813 BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT
);
814 params
.sq_drained_async_notify
= qp_attr
->en_sqd_async_notify
;
817 if (qp_attr_mask
& IB_QP_QKEY
) {
818 params
.modify_mask
|= BIT(EFA_ADMIN_QKEY_BIT
);
819 params
.qkey
= qp_attr
->qkey
;
822 if (qp_attr_mask
& IB_QP_SQ_PSN
) {
823 params
.modify_mask
|= BIT(EFA_ADMIN_SQ_PSN_BIT
);
824 params
.sq_psn
= qp_attr
->sq_psn
;
827 err
= efa_com_modify_qp(&dev
->edev
, ¶ms
);
831 qp
->state
= new_state
;
836 static int efa_destroy_cq_idx(struct efa_dev
*dev
, int cq_idx
)
838 struct efa_com_destroy_cq_params params
= { .cq_idx
= cq_idx
};
840 return efa_com_destroy_cq(&dev
->edev
, ¶ms
);
843 void efa_destroy_cq(struct ib_cq
*ibcq
, struct ib_udata
*udata
)
845 struct efa_dev
*dev
= to_edev(ibcq
->device
);
846 struct efa_cq
*cq
= to_ecq(ibcq
);
848 ibdev_dbg(&dev
->ibdev
,
849 "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
850 cq
->cq_idx
, cq
->cpu_addr
, cq
->size
, &cq
->dma_addr
);
852 rdma_user_mmap_entry_remove(cq
->mmap_entry
);
853 efa_destroy_cq_idx(dev
, cq
->cq_idx
);
854 efa_free_mapped(dev
, cq
->cpu_addr
, cq
->dma_addr
, cq
->size
,
858 static int cq_mmap_entries_setup(struct efa_dev
*dev
, struct efa_cq
*cq
,
859 struct efa_ibv_create_cq_resp
*resp
)
861 resp
->q_mmap_size
= cq
->size
;
862 cq
->mmap_entry
= efa_user_mmap_entry_insert(&cq
->ucontext
->ibucontext
,
863 virt_to_phys(cq
->cpu_addr
),
864 cq
->size
, EFA_MMAP_DMA_PAGE
,
872 int efa_create_cq(struct ib_cq
*ibcq
, const struct ib_cq_init_attr
*attr
,
873 struct ib_udata
*udata
)
875 struct efa_ucontext
*ucontext
= rdma_udata_to_drv_context(
876 udata
, struct efa_ucontext
, ibucontext
);
877 struct efa_ibv_create_cq_resp resp
= {};
878 struct efa_com_create_cq_params params
;
879 struct efa_com_create_cq_result result
;
880 struct ib_device
*ibdev
= ibcq
->device
;
881 struct efa_dev
*dev
= to_edev(ibdev
);
882 struct efa_ibv_create_cq cmd
= {};
883 struct efa_cq
*cq
= to_ecq(ibcq
);
884 int entries
= attr
->cqe
;
887 ibdev_dbg(ibdev
, "create_cq entries %d\n", entries
);
889 if (entries
< 1 || entries
> dev
->dev_attr
.max_cq_depth
) {
891 "cq: requested entries[%u] non-positive or greater than max[%u]\n",
892 entries
, dev
->dev_attr
.max_cq_depth
);
897 if (offsetofend(typeof(cmd
), num_sub_cqs
) > udata
->inlen
) {
899 "Incompatible ABI params, no input udata\n");
904 if (udata
->inlen
> sizeof(cmd
) &&
905 !ib_is_udata_cleared(udata
, sizeof(cmd
),
906 udata
->inlen
- sizeof(cmd
))) {
908 "Incompatible ABI params, unknown fields in udata\n");
913 err
= ib_copy_from_udata(&cmd
, udata
,
914 min(sizeof(cmd
), udata
->inlen
));
916 ibdev_dbg(ibdev
, "Cannot copy udata for create_cq\n");
920 if (cmd
.comp_mask
|| !is_reserved_cleared(cmd
.reserved_50
)) {
922 "Incompatible ABI params, unknown fields in udata\n");
927 if (!cmd
.cq_entry_size
) {
929 "Invalid entry size [%u]\n", cmd
.cq_entry_size
);
934 if (cmd
.num_sub_cqs
!= dev
->dev_attr
.sub_cqs_per_cq
) {
936 "Invalid number of sub cqs[%u] expected[%u]\n",
937 cmd
.num_sub_cqs
, dev
->dev_attr
.sub_cqs_per_cq
);
942 cq
->ucontext
= ucontext
;
943 cq
->size
= PAGE_ALIGN(cmd
.cq_entry_size
* entries
* cmd
.num_sub_cqs
);
944 cq
->cpu_addr
= efa_zalloc_mapped(dev
, &cq
->dma_addr
, cq
->size
,
951 params
.uarn
= cq
->ucontext
->uarn
;
952 params
.cq_depth
= entries
;
953 params
.dma_addr
= cq
->dma_addr
;
954 params
.entry_size_in_bytes
= cmd
.cq_entry_size
;
955 params
.num_sub_cqs
= cmd
.num_sub_cqs
;
956 err
= efa_com_create_cq(&dev
->edev
, ¶ms
, &result
);
958 goto err_free_mapped
;
960 resp
.cq_idx
= result
.cq_idx
;
961 cq
->cq_idx
= result
.cq_idx
;
962 cq
->ibcq
.cqe
= result
.actual_depth
;
963 WARN_ON_ONCE(entries
!= result
.actual_depth
);
965 err
= cq_mmap_entries_setup(dev
, cq
, &resp
);
967 ibdev_dbg(ibdev
, "Could not setup cq[%u] mmap entries\n",
973 err
= ib_copy_to_udata(udata
, &resp
,
974 min(sizeof(resp
), udata
->outlen
));
977 "Failed to copy udata for create_cq\n");
978 goto err_remove_mmap
;
982 ibdev_dbg(ibdev
, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
983 cq
->cq_idx
, result
.actual_depth
, &cq
->dma_addr
, cq
->cpu_addr
);
988 rdma_user_mmap_entry_remove(cq
->mmap_entry
);
990 efa_destroy_cq_idx(dev
, cq
->cq_idx
);
992 efa_free_mapped(dev
, cq
->cpu_addr
, cq
->dma_addr
, cq
->size
,
996 atomic64_inc(&dev
->stats
.sw_stats
.create_cq_err
);
1000 static int umem_to_page_list(struct efa_dev
*dev
,
1001 struct ib_umem
*umem
,
1006 u32 pages_in_hp
= BIT(hp_shift
- PAGE_SHIFT
);
1007 struct ib_block_iter biter
;
1008 unsigned int hp_idx
= 0;
1010 ibdev_dbg(&dev
->ibdev
, "hp_cnt[%u], pages_in_hp[%u]\n",
1011 hp_cnt
, pages_in_hp
);
1013 rdma_for_each_block(umem
->sg_head
.sgl
, &biter
, umem
->nmap
,
1015 page_list
[hp_idx
++] = rdma_block_iter_dma_address(&biter
);
1020 static struct scatterlist
*efa_vmalloc_buf_to_sg(u64
*buf
, int page_cnt
)
1022 struct scatterlist
*sglist
;
1026 sglist
= kcalloc(page_cnt
, sizeof(*sglist
), GFP_KERNEL
);
1029 sg_init_table(sglist
, page_cnt
);
1030 for (i
= 0; i
< page_cnt
; i
++) {
1031 pg
= vmalloc_to_page(buf
);
1034 sg_set_page(&sglist
[i
], pg
, PAGE_SIZE
, 0);
1035 buf
+= PAGE_SIZE
/ sizeof(*buf
);
1045 * create a chunk list of physical pages dma addresses from the supplied
1046 * scatter gather list
1048 static int pbl_chunk_list_create(struct efa_dev
*dev
, struct pbl_context
*pbl
)
1050 struct pbl_chunk_list
*chunk_list
= &pbl
->phys
.indirect
.chunk_list
;
1051 int page_cnt
= pbl
->phys
.indirect
.pbl_buf_size_in_pages
;
1052 struct scatterlist
*pages_sgl
= pbl
->phys
.indirect
.sgl
;
1053 unsigned int chunk_list_size
, chunk_idx
, payload_idx
;
1054 int sg_dma_cnt
= pbl
->phys
.indirect
.sg_dma_cnt
;
1055 struct efa_com_ctrl_buff_info
*ctrl_buf
;
1056 u64
*cur_chunk_buf
, *prev_chunk_buf
;
1057 struct ib_block_iter biter
;
1058 dma_addr_t dma_addr
;
1061 /* allocate a chunk list that consists of 4KB chunks */
1062 chunk_list_size
= DIV_ROUND_UP(page_cnt
, EFA_PTRS_PER_CHUNK
);
1064 chunk_list
->size
= chunk_list_size
;
1065 chunk_list
->chunks
= kcalloc(chunk_list_size
,
1066 sizeof(*chunk_list
->chunks
),
1068 if (!chunk_list
->chunks
)
1071 ibdev_dbg(&dev
->ibdev
,
1072 "chunk_list_size[%u] - pages[%u]\n", chunk_list_size
,
1075 /* allocate chunk buffers: */
1076 for (i
= 0; i
< chunk_list_size
; i
++) {
1077 chunk_list
->chunks
[i
].buf
= kzalloc(EFA_CHUNK_SIZE
, GFP_KERNEL
);
1078 if (!chunk_list
->chunks
[i
].buf
)
1079 goto chunk_list_dealloc
;
1081 chunk_list
->chunks
[i
].length
= EFA_CHUNK_USED_SIZE
;
1083 chunk_list
->chunks
[chunk_list_size
- 1].length
=
1084 ((page_cnt
% EFA_PTRS_PER_CHUNK
) * EFA_CHUNK_PAYLOAD_PTR_SIZE
) +
1087 /* fill the dma addresses of sg list pages to chunks: */
1090 cur_chunk_buf
= chunk_list
->chunks
[0].buf
;
1091 rdma_for_each_block(pages_sgl
, &biter
, sg_dma_cnt
,
1092 EFA_CHUNK_PAYLOAD_SIZE
) {
1093 cur_chunk_buf
[payload_idx
++] =
1094 rdma_block_iter_dma_address(&biter
);
1096 if (payload_idx
== EFA_PTRS_PER_CHUNK
) {
1098 cur_chunk_buf
= chunk_list
->chunks
[chunk_idx
].buf
;
1103 /* map chunks to dma and fill chunks next ptrs */
1104 for (i
= chunk_list_size
- 1; i
>= 0; i
--) {
1105 dma_addr
= dma_map_single(&dev
->pdev
->dev
,
1106 chunk_list
->chunks
[i
].buf
,
1107 chunk_list
->chunks
[i
].length
,
1109 if (dma_mapping_error(&dev
->pdev
->dev
, dma_addr
)) {
1110 ibdev_err(&dev
->ibdev
,
1111 "chunk[%u] dma_map_failed\n", i
);
1112 goto chunk_list_unmap
;
1115 chunk_list
->chunks
[i
].dma_addr
= dma_addr
;
1116 ibdev_dbg(&dev
->ibdev
,
1117 "chunk[%u] mapped at [%pad]\n", i
, &dma_addr
);
1122 prev_chunk_buf
= chunk_list
->chunks
[i
- 1].buf
;
1124 ctrl_buf
= (struct efa_com_ctrl_buff_info
*)
1125 &prev_chunk_buf
[EFA_PTRS_PER_CHUNK
];
1126 ctrl_buf
->length
= chunk_list
->chunks
[i
].length
;
1128 efa_com_set_dma_addr(dma_addr
,
1129 &ctrl_buf
->address
.mem_addr_high
,
1130 &ctrl_buf
->address
.mem_addr_low
);
1136 for (; i
< chunk_list_size
; i
++) {
1137 dma_unmap_single(&dev
->pdev
->dev
, chunk_list
->chunks
[i
].dma_addr
,
1138 chunk_list
->chunks
[i
].length
, DMA_TO_DEVICE
);
1141 for (i
= 0; i
< chunk_list_size
; i
++)
1142 kfree(chunk_list
->chunks
[i
].buf
);
1144 kfree(chunk_list
->chunks
);
1148 static void pbl_chunk_list_destroy(struct efa_dev
*dev
, struct pbl_context
*pbl
)
1150 struct pbl_chunk_list
*chunk_list
= &pbl
->phys
.indirect
.chunk_list
;
1153 for (i
= 0; i
< chunk_list
->size
; i
++) {
1154 dma_unmap_single(&dev
->pdev
->dev
, chunk_list
->chunks
[i
].dma_addr
,
1155 chunk_list
->chunks
[i
].length
, DMA_TO_DEVICE
);
1156 kfree(chunk_list
->chunks
[i
].buf
);
1159 kfree(chunk_list
->chunks
);
1162 /* initialize pbl continuous mode: map pbl buffer to a dma address. */
1163 static int pbl_continuous_initialize(struct efa_dev
*dev
,
1164 struct pbl_context
*pbl
)
1166 dma_addr_t dma_addr
;
1168 dma_addr
= dma_map_single(&dev
->pdev
->dev
, pbl
->pbl_buf
,
1169 pbl
->pbl_buf_size_in_bytes
, DMA_TO_DEVICE
);
1170 if (dma_mapping_error(&dev
->pdev
->dev
, dma_addr
)) {
1171 ibdev_err(&dev
->ibdev
, "Unable to map pbl to DMA address\n");
1175 pbl
->phys
.continuous
.dma_addr
= dma_addr
;
1176 ibdev_dbg(&dev
->ibdev
,
1177 "pbl continuous - dma_addr = %pad, size[%u]\n",
1178 &dma_addr
, pbl
->pbl_buf_size_in_bytes
);
1184 * initialize pbl indirect mode:
1185 * create a chunk list out of the dma addresses of the physical pages of
1188 static int pbl_indirect_initialize(struct efa_dev
*dev
, struct pbl_context
*pbl
)
1190 u32 size_in_pages
= DIV_ROUND_UP(pbl
->pbl_buf_size_in_bytes
, PAGE_SIZE
);
1191 struct scatterlist
*sgl
;
1192 int sg_dma_cnt
, err
;
1194 BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE
> PAGE_SIZE
);
1195 sgl
= efa_vmalloc_buf_to_sg(pbl
->pbl_buf
, size_in_pages
);
1199 sg_dma_cnt
= dma_map_sg(&dev
->pdev
->dev
, sgl
, size_in_pages
, DMA_TO_DEVICE
);
1205 pbl
->phys
.indirect
.pbl_buf_size_in_pages
= size_in_pages
;
1206 pbl
->phys
.indirect
.sgl
= sgl
;
1207 pbl
->phys
.indirect
.sg_dma_cnt
= sg_dma_cnt
;
1208 err
= pbl_chunk_list_create(dev
, pbl
);
1210 ibdev_dbg(&dev
->ibdev
,
1211 "chunk_list creation failed[%d]\n", err
);
1215 ibdev_dbg(&dev
->ibdev
,
1216 "pbl indirect - size[%u], chunks[%u]\n",
1217 pbl
->pbl_buf_size_in_bytes
,
1218 pbl
->phys
.indirect
.chunk_list
.size
);
1223 dma_unmap_sg(&dev
->pdev
->dev
, sgl
, size_in_pages
, DMA_TO_DEVICE
);
1229 static void pbl_indirect_terminate(struct efa_dev
*dev
, struct pbl_context
*pbl
)
1231 pbl_chunk_list_destroy(dev
, pbl
);
1232 dma_unmap_sg(&dev
->pdev
->dev
, pbl
->phys
.indirect
.sgl
,
1233 pbl
->phys
.indirect
.pbl_buf_size_in_pages
, DMA_TO_DEVICE
);
1234 kfree(pbl
->phys
.indirect
.sgl
);
1237 /* create a page buffer list from a mapped user memory region */
1238 static int pbl_create(struct efa_dev
*dev
,
1239 struct pbl_context
*pbl
,
1240 struct ib_umem
*umem
,
1246 pbl
->pbl_buf_size_in_bytes
= hp_cnt
* EFA_CHUNK_PAYLOAD_PTR_SIZE
;
1247 pbl
->pbl_buf
= kvzalloc(pbl
->pbl_buf_size_in_bytes
, GFP_KERNEL
);
1251 if (is_vmalloc_addr(pbl
->pbl_buf
)) {
1252 pbl
->physically_continuous
= 0;
1253 err
= umem_to_page_list(dev
, umem
, pbl
->pbl_buf
, hp_cnt
,
1258 err
= pbl_indirect_initialize(dev
, pbl
);
1262 pbl
->physically_continuous
= 1;
1263 err
= umem_to_page_list(dev
, umem
, pbl
->pbl_buf
, hp_cnt
,
1268 err
= pbl_continuous_initialize(dev
, pbl
);
1273 ibdev_dbg(&dev
->ibdev
,
1274 "user_pbl_created: user_pages[%u], continuous[%u]\n",
1275 hp_cnt
, pbl
->physically_continuous
);
1280 kvfree(pbl
->pbl_buf
);
1284 static void pbl_destroy(struct efa_dev
*dev
, struct pbl_context
*pbl
)
1286 if (pbl
->physically_continuous
)
1287 dma_unmap_single(&dev
->pdev
->dev
, pbl
->phys
.continuous
.dma_addr
,
1288 pbl
->pbl_buf_size_in_bytes
, DMA_TO_DEVICE
);
1290 pbl_indirect_terminate(dev
, pbl
);
1292 kvfree(pbl
->pbl_buf
);
1295 static int efa_create_inline_pbl(struct efa_dev
*dev
, struct efa_mr
*mr
,
1296 struct efa_com_reg_mr_params
*params
)
1300 params
->inline_pbl
= 1;
1301 err
= umem_to_page_list(dev
, mr
->umem
, params
->pbl
.inline_pbl_array
,
1302 params
->page_num
, params
->page_shift
);
1306 ibdev_dbg(&dev
->ibdev
,
1307 "inline_pbl_array - pages[%u]\n", params
->page_num
);
1312 static int efa_create_pbl(struct efa_dev
*dev
,
1313 struct pbl_context
*pbl
,
1315 struct efa_com_reg_mr_params
*params
)
1319 err
= pbl_create(dev
, pbl
, mr
->umem
, params
->page_num
,
1320 params
->page_shift
);
1322 ibdev_dbg(&dev
->ibdev
, "Failed to create pbl[%d]\n", err
);
1326 params
->inline_pbl
= 0;
1327 params
->indirect
= !pbl
->physically_continuous
;
1328 if (pbl
->physically_continuous
) {
1329 params
->pbl
.pbl
.length
= pbl
->pbl_buf_size_in_bytes
;
1331 efa_com_set_dma_addr(pbl
->phys
.continuous
.dma_addr
,
1332 ¶ms
->pbl
.pbl
.address
.mem_addr_high
,
1333 ¶ms
->pbl
.pbl
.address
.mem_addr_low
);
1335 params
->pbl
.pbl
.length
=
1336 pbl
->phys
.indirect
.chunk_list
.chunks
[0].length
;
1338 efa_com_set_dma_addr(pbl
->phys
.indirect
.chunk_list
.chunks
[0].dma_addr
,
1339 ¶ms
->pbl
.pbl
.address
.mem_addr_high
,
1340 ¶ms
->pbl
.pbl
.address
.mem_addr_low
);
1346 struct ib_mr
*efa_reg_mr(struct ib_pd
*ibpd
, u64 start
, u64 length
,
1347 u64 virt_addr
, int access_flags
,
1348 struct ib_udata
*udata
)
1350 struct efa_dev
*dev
= to_edev(ibpd
->device
);
1351 struct efa_com_reg_mr_params params
= {};
1352 struct efa_com_reg_mr_result result
= {};
1353 struct pbl_context pbl
;
1354 int supp_access_flags
;
1360 if (udata
&& udata
->inlen
&&
1361 !ib_is_udata_cleared(udata
, 0, sizeof(udata
->inlen
))) {
1362 ibdev_dbg(&dev
->ibdev
,
1363 "Incompatible ABI params, udata not cleared\n");
1369 IB_ACCESS_LOCAL_WRITE
|
1370 (is_rdma_read_cap(dev
) ? IB_ACCESS_REMOTE_READ
: 0);
1372 access_flags
&= ~IB_ACCESS_OPTIONAL
;
1373 if (access_flags
& ~supp_access_flags
) {
1374 ibdev_dbg(&dev
->ibdev
,
1375 "Unsupported access flags[%#x], supported[%#x]\n",
1376 access_flags
, supp_access_flags
);
1381 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1387 mr
->umem
= ib_umem_get(ibpd
->device
, start
, length
, access_flags
);
1388 if (IS_ERR(mr
->umem
)) {
1389 err
= PTR_ERR(mr
->umem
);
1390 ibdev_dbg(&dev
->ibdev
,
1391 "Failed to pin and map user space memory[%d]\n", err
);
1395 params
.pd
= to_epd(ibpd
)->pdn
;
1396 params
.iova
= virt_addr
;
1397 params
.mr_length_in_bytes
= length
;
1398 params
.permissions
= access_flags
;
1400 pg_sz
= ib_umem_find_best_pgsz(mr
->umem
,
1401 dev
->dev_attr
.page_size_cap
,
1405 ibdev_dbg(&dev
->ibdev
, "Failed to find a suitable page size in page_size_cap %#llx\n",
1406 dev
->dev_attr
.page_size_cap
);
1410 params
.page_shift
= __ffs(pg_sz
);
1411 params
.page_num
= DIV_ROUND_UP(length
+ (start
& (pg_sz
- 1)),
1414 ibdev_dbg(&dev
->ibdev
,
1415 "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
1416 start
, length
, params
.page_shift
, params
.page_num
);
1418 inline_size
= ARRAY_SIZE(params
.pbl
.inline_pbl_array
);
1419 if (params
.page_num
<= inline_size
) {
1420 err
= efa_create_inline_pbl(dev
, mr
, ¶ms
);
1424 err
= efa_com_register_mr(&dev
->edev
, ¶ms
, &result
);
1428 err
= efa_create_pbl(dev
, &pbl
, mr
, ¶ms
);
1432 err
= efa_com_register_mr(&dev
->edev
, ¶ms
, &result
);
1433 pbl_destroy(dev
, &pbl
);
1439 mr
->ibmr
.lkey
= result
.l_key
;
1440 mr
->ibmr
.rkey
= result
.r_key
;
1441 mr
->ibmr
.length
= length
;
1442 ibdev_dbg(&dev
->ibdev
, "Registered mr[%d]\n", mr
->ibmr
.lkey
);
1447 ib_umem_release(mr
->umem
);
1451 atomic64_inc(&dev
->stats
.sw_stats
.reg_mr_err
);
1452 return ERR_PTR(err
);
1455 int efa_dereg_mr(struct ib_mr
*ibmr
, struct ib_udata
*udata
)
1457 struct efa_dev
*dev
= to_edev(ibmr
->device
);
1458 struct efa_com_dereg_mr_params params
;
1459 struct efa_mr
*mr
= to_emr(ibmr
);
1462 ibdev_dbg(&dev
->ibdev
, "Deregister mr[%d]\n", ibmr
->lkey
);
1464 params
.l_key
= mr
->ibmr
.lkey
;
1465 err
= efa_com_dereg_mr(&dev
->edev
, ¶ms
);
1469 ib_umem_release(mr
->umem
);
1475 int efa_get_port_immutable(struct ib_device
*ibdev
, u8 port_num
,
1476 struct ib_port_immutable
*immutable
)
1478 struct ib_port_attr attr
;
1481 err
= ib_query_port(ibdev
, port_num
, &attr
);
1483 ibdev_dbg(ibdev
, "Couldn't query port err[%d]\n", err
);
1487 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
1488 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
1493 static int efa_dealloc_uar(struct efa_dev
*dev
, u16 uarn
)
1495 struct efa_com_dealloc_uar_params params
= {
1499 return efa_com_dealloc_uar(&dev
->edev
, ¶ms
);
1502 int efa_alloc_ucontext(struct ib_ucontext
*ibucontext
, struct ib_udata
*udata
)
1504 struct efa_ucontext
*ucontext
= to_eucontext(ibucontext
);
1505 struct efa_dev
*dev
= to_edev(ibucontext
->device
);
1506 struct efa_ibv_alloc_ucontext_resp resp
= {};
1507 struct efa_com_alloc_uar_result result
;
1511 * it's fine if the driver does not know all request fields,
1512 * we will ack input fields in our response.
1515 err
= efa_com_alloc_uar(&dev
->edev
, &result
);
1519 ucontext
->uarn
= result
.uarn
;
1521 resp
.cmds_supp_udata_mask
|= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE
;
1522 resp
.cmds_supp_udata_mask
|= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH
;
1523 resp
.sub_cqs_per_cq
= dev
->dev_attr
.sub_cqs_per_cq
;
1524 resp
.inline_buf_size
= dev
->dev_attr
.inline_buf_size
;
1525 resp
.max_llq_size
= dev
->dev_attr
.max_llq_size
;
1527 if (udata
&& udata
->outlen
) {
1528 err
= ib_copy_to_udata(udata
, &resp
,
1529 min(sizeof(resp
), udata
->outlen
));
1531 goto err_dealloc_uar
;
1537 efa_dealloc_uar(dev
, result
.uarn
);
1539 atomic64_inc(&dev
->stats
.sw_stats
.alloc_ucontext_err
);
1543 void efa_dealloc_ucontext(struct ib_ucontext
*ibucontext
)
1545 struct efa_ucontext
*ucontext
= to_eucontext(ibucontext
);
1546 struct efa_dev
*dev
= to_edev(ibucontext
->device
);
1548 efa_dealloc_uar(dev
, ucontext
->uarn
);
1551 void efa_mmap_free(struct rdma_user_mmap_entry
*rdma_entry
)
1553 struct efa_user_mmap_entry
*entry
= to_emmap(rdma_entry
);
1558 static int __efa_mmap(struct efa_dev
*dev
, struct efa_ucontext
*ucontext
,
1559 struct vm_area_struct
*vma
)
1561 struct rdma_user_mmap_entry
*rdma_entry
;
1562 struct efa_user_mmap_entry
*entry
;
1567 rdma_entry
= rdma_user_mmap_entry_get(&ucontext
->ibucontext
, vma
);
1569 ibdev_dbg(&dev
->ibdev
,
1570 "pgoff[%#lx] does not have valid entry\n",
1574 entry
= to_emmap(rdma_entry
);
1576 ibdev_dbg(&dev
->ibdev
,
1577 "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
1578 entry
->address
, rdma_entry
->npages
* PAGE_SIZE
,
1581 pfn
= entry
->address
>> PAGE_SHIFT
;
1582 switch (entry
->mmap_flag
) {
1583 case EFA_MMAP_IO_NC
:
1584 err
= rdma_user_mmap_io(&ucontext
->ibucontext
, vma
, pfn
,
1585 entry
->rdma_entry
.npages
* PAGE_SIZE
,
1586 pgprot_noncached(vma
->vm_page_prot
),
1589 case EFA_MMAP_IO_WC
:
1590 err
= rdma_user_mmap_io(&ucontext
->ibucontext
, vma
, pfn
,
1591 entry
->rdma_entry
.npages
* PAGE_SIZE
,
1592 pgprot_writecombine(vma
->vm_page_prot
),
1595 case EFA_MMAP_DMA_PAGE
:
1596 for (va
= vma
->vm_start
; va
< vma
->vm_end
;
1597 va
+= PAGE_SIZE
, pfn
++) {
1598 err
= vm_insert_page(vma
, va
, pfn_to_page(pfn
));
1610 "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
1611 entry
->address
, rdma_entry
->npages
* PAGE_SIZE
,
1612 entry
->mmap_flag
, err
);
1614 rdma_user_mmap_entry_put(rdma_entry
);
1618 int efa_mmap(struct ib_ucontext
*ibucontext
,
1619 struct vm_area_struct
*vma
)
1621 struct efa_ucontext
*ucontext
= to_eucontext(ibucontext
);
1622 struct efa_dev
*dev
= to_edev(ibucontext
->device
);
1623 size_t length
= vma
->vm_end
- vma
->vm_start
;
1625 ibdev_dbg(&dev
->ibdev
,
1626 "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
1627 vma
->vm_start
, vma
->vm_end
, length
, vma
->vm_pgoff
);
1629 return __efa_mmap(dev
, ucontext
, vma
);
1632 static int efa_ah_destroy(struct efa_dev
*dev
, struct efa_ah
*ah
)
1634 struct efa_com_destroy_ah_params params
= {
1636 .pdn
= to_epd(ah
->ibah
.pd
)->pdn
,
1639 return efa_com_destroy_ah(&dev
->edev
, ¶ms
);
1642 int efa_create_ah(struct ib_ah
*ibah
,
1643 struct rdma_ah_attr
*ah_attr
,
1645 struct ib_udata
*udata
)
1647 struct efa_dev
*dev
= to_edev(ibah
->device
);
1648 struct efa_com_create_ah_params params
= {};
1649 struct efa_ibv_create_ah_resp resp
= {};
1650 struct efa_com_create_ah_result result
;
1651 struct efa_ah
*ah
= to_eah(ibah
);
1654 if (!(flags
& RDMA_CREATE_AH_SLEEPABLE
)) {
1655 ibdev_dbg(&dev
->ibdev
,
1656 "Create address handle is not supported in atomic context\n");
1662 !ib_is_udata_cleared(udata
, 0, udata
->inlen
)) {
1663 ibdev_dbg(&dev
->ibdev
, "Incompatible ABI params\n");
1668 memcpy(params
.dest_addr
, ah_attr
->grh
.dgid
.raw
,
1669 sizeof(params
.dest_addr
));
1670 params
.pdn
= to_epd(ibah
->pd
)->pdn
;
1671 err
= efa_com_create_ah(&dev
->edev
, ¶ms
, &result
);
1675 memcpy(ah
->id
, ah_attr
->grh
.dgid
.raw
, sizeof(ah
->id
));
1678 resp
.efa_address_handle
= result
.ah
;
1680 if (udata
->outlen
) {
1681 err
= ib_copy_to_udata(udata
, &resp
,
1682 min(sizeof(resp
), udata
->outlen
));
1684 ibdev_dbg(&dev
->ibdev
,
1685 "Failed to copy udata for create_ah response\n");
1686 goto err_destroy_ah
;
1689 ibdev_dbg(&dev
->ibdev
, "Created ah[%d]\n", ah
->ah
);
1694 efa_ah_destroy(dev
, ah
);
1696 atomic64_inc(&dev
->stats
.sw_stats
.create_ah_err
);
1700 void efa_destroy_ah(struct ib_ah
*ibah
, u32 flags
)
1702 struct efa_dev
*dev
= to_edev(ibah
->pd
->device
);
1703 struct efa_ah
*ah
= to_eah(ibah
);
1705 ibdev_dbg(&dev
->ibdev
, "Destroy ah[%d]\n", ah
->ah
);
1707 if (!(flags
& RDMA_DESTROY_AH_SLEEPABLE
)) {
1708 ibdev_dbg(&dev
->ibdev
,
1709 "Destroy address handle is not supported in atomic context\n");
1713 efa_ah_destroy(dev
, ah
);
1716 struct rdma_hw_stats
*efa_alloc_hw_stats(struct ib_device
*ibdev
, u8 port_num
)
1718 return rdma_alloc_hw_stats_struct(efa_stats_names
,
1719 ARRAY_SIZE(efa_stats_names
),
1720 RDMA_HW_STATS_DEFAULT_LIFESPAN
);
1723 int efa_get_hw_stats(struct ib_device
*ibdev
, struct rdma_hw_stats
*stats
,
1724 u8 port_num
, int index
)
1726 struct efa_com_get_stats_params params
= {};
1727 union efa_com_get_stats_result result
;
1728 struct efa_dev
*dev
= to_edev(ibdev
);
1729 struct efa_com_basic_stats
*bs
;
1730 struct efa_com_stats_admin
*as
;
1731 struct efa_stats
*s
;
1734 params
.type
= EFA_ADMIN_GET_STATS_TYPE_BASIC
;
1735 params
.scope
= EFA_ADMIN_GET_STATS_SCOPE_ALL
;
1737 err
= efa_com_get_stats(&dev
->edev
, ¶ms
, &result
);
1741 bs
= &result
.basic_stats
;
1742 stats
->value
[EFA_TX_BYTES
] = bs
->tx_bytes
;
1743 stats
->value
[EFA_TX_PKTS
] = bs
->tx_pkts
;
1744 stats
->value
[EFA_RX_BYTES
] = bs
->rx_bytes
;
1745 stats
->value
[EFA_RX_PKTS
] = bs
->rx_pkts
;
1746 stats
->value
[EFA_RX_DROPS
] = bs
->rx_drops
;
1748 as
= &dev
->edev
.aq
.stats
;
1749 stats
->value
[EFA_SUBMITTED_CMDS
] = atomic64_read(&as
->submitted_cmd
);
1750 stats
->value
[EFA_COMPLETED_CMDS
] = atomic64_read(&as
->completed_cmd
);
1751 stats
->value
[EFA_NO_COMPLETION_CMDS
] = atomic64_read(&as
->no_completion
);
1754 stats
->value
[EFA_KEEP_ALIVE_RCVD
] = atomic64_read(&s
->keep_alive_rcvd
);
1755 stats
->value
[EFA_ALLOC_PD_ERR
] = atomic64_read(&s
->sw_stats
.alloc_pd_err
);
1756 stats
->value
[EFA_CREATE_QP_ERR
] = atomic64_read(&s
->sw_stats
.create_qp_err
);
1757 stats
->value
[EFA_REG_MR_ERR
] = atomic64_read(&s
->sw_stats
.reg_mr_err
);
1758 stats
->value
[EFA_ALLOC_UCONTEXT_ERR
] = atomic64_read(&s
->sw_stats
.alloc_ucontext_err
);
1759 stats
->value
[EFA_CREATE_AH_ERR
] = atomic64_read(&s
->sw_stats
.create_ah_err
);
1761 return ARRAY_SIZE(efa_stats_names
);
1764 enum rdma_link_layer
efa_port_link_layer(struct ib_device
*ibdev
,
1767 return IB_LINK_LAYER_UNSPECIFIED
;