1 /* QLogic qedr NIC Driver
2 * Copyright (c) 2015-2016 QLogic Corporation
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and /or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
37 #include <linux/iommu.h>
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 #include "qedr_iw_cm.h"
56 #define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm)
57 #define RDMA_MAX_SGE_PER_SRQ (4)
58 #define RDMA_MAX_SRQ_WQE_SIZE (RDMA_MAX_SGE_PER_SRQ + 1)
60 #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
63 QEDR_USER_MMAP_IO_WC
= 0,
64 QEDR_USER_MMAP_PHYS_PAGE
,
67 static inline int qedr_ib_copy_to_udata(struct ib_udata
*udata
, void *src
,
70 size_t min_len
= min_t(size_t, len
, udata
->outlen
);
72 return ib_copy_to_udata(udata
, src
, min_len
);
75 int qedr_query_pkey(struct ib_device
*ibdev
, u8 port
, u16 index
, u16
*pkey
)
77 if (index
>= QEDR_ROCE_PKEY_TABLE_LEN
)
80 *pkey
= QEDR_ROCE_PKEY_DEFAULT
;
84 int qedr_iw_query_gid(struct ib_device
*ibdev
, u8 port
,
85 int index
, union ib_gid
*sgid
)
87 struct qedr_dev
*dev
= get_qedr_dev(ibdev
);
89 memset(sgid
->raw
, 0, sizeof(sgid
->raw
));
90 ether_addr_copy(sgid
->raw
, dev
->ndev
->dev_addr
);
92 DP_DEBUG(dev
, QEDR_MSG_INIT
, "QUERY sgid[%d]=%llx:%llx\n", index
,
93 sgid
->global
.interface_id
, sgid
->global
.subnet_prefix
);
98 int qedr_query_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*srq_attr
)
100 struct qedr_dev
*dev
= get_qedr_dev(ibsrq
->device
);
101 struct qedr_device_attr
*qattr
= &dev
->attr
;
102 struct qedr_srq
*srq
= get_qedr_srq(ibsrq
);
104 srq_attr
->srq_limit
= srq
->srq_limit
;
105 srq_attr
->max_wr
= qattr
->max_srq_wr
;
106 srq_attr
->max_sge
= qattr
->max_sge
;
111 int qedr_query_device(struct ib_device
*ibdev
,
112 struct ib_device_attr
*attr
, struct ib_udata
*udata
)
114 struct qedr_dev
*dev
= get_qedr_dev(ibdev
);
115 struct qedr_device_attr
*qattr
= &dev
->attr
;
117 if (!dev
->rdma_ctx
) {
119 "qedr_query_device called with invalid params rdma_ctx=%p\n",
124 memset(attr
, 0, sizeof(*attr
));
126 attr
->fw_ver
= qattr
->fw_ver
;
127 attr
->sys_image_guid
= qattr
->sys_image_guid
;
128 attr
->max_mr_size
= qattr
->max_mr_size
;
129 attr
->page_size_cap
= qattr
->page_size_caps
;
130 attr
->vendor_id
= qattr
->vendor_id
;
131 attr
->vendor_part_id
= qattr
->vendor_part_id
;
132 attr
->hw_ver
= qattr
->hw_ver
;
133 attr
->max_qp
= qattr
->max_qp
;
134 attr
->max_qp_wr
= max_t(u32
, qattr
->max_sqe
, qattr
->max_rqe
);
135 attr
->device_cap_flags
= IB_DEVICE_CURR_QP_STATE_MOD
|
136 IB_DEVICE_RC_RNR_NAK_GEN
|
137 IB_DEVICE_LOCAL_DMA_LKEY
| IB_DEVICE_MEM_MGT_EXTENSIONS
;
139 attr
->max_send_sge
= qattr
->max_sge
;
140 attr
->max_recv_sge
= qattr
->max_sge
;
141 attr
->max_sge_rd
= qattr
->max_sge
;
142 attr
->max_cq
= qattr
->max_cq
;
143 attr
->max_cqe
= qattr
->max_cqe
;
144 attr
->max_mr
= qattr
->max_mr
;
145 attr
->max_mw
= qattr
->max_mw
;
146 attr
->max_pd
= qattr
->max_pd
;
147 attr
->atomic_cap
= dev
->atomic_cap
;
148 attr
->max_fmr
= qattr
->max_fmr
;
149 attr
->max_map_per_fmr
= 16;
150 attr
->max_qp_init_rd_atom
=
151 1 << (fls(qattr
->max_qp_req_rd_atomic_resc
) - 1);
152 attr
->max_qp_rd_atom
=
153 min(1 << (fls(qattr
->max_qp_resp_rd_atomic_resc
) - 1),
154 attr
->max_qp_init_rd_atom
);
156 attr
->max_srq
= qattr
->max_srq
;
157 attr
->max_srq_sge
= qattr
->max_srq_sge
;
158 attr
->max_srq_wr
= qattr
->max_srq_wr
;
160 attr
->local_ca_ack_delay
= qattr
->dev_ack_delay
;
161 attr
->max_fast_reg_page_list_len
= qattr
->max_mr
/ 8;
162 attr
->max_pkeys
= QEDR_ROCE_PKEY_MAX
;
163 attr
->max_ah
= qattr
->max_ah
;
168 static inline void get_link_speed_and_width(int speed
, u8
*ib_speed
,
173 *ib_speed
= IB_SPEED_SDR
;
174 *ib_width
= IB_WIDTH_1X
;
177 *ib_speed
= IB_SPEED_QDR
;
178 *ib_width
= IB_WIDTH_1X
;
182 *ib_speed
= IB_SPEED_DDR
;
183 *ib_width
= IB_WIDTH_4X
;
187 *ib_speed
= IB_SPEED_EDR
;
188 *ib_width
= IB_WIDTH_1X
;
192 *ib_speed
= IB_SPEED_QDR
;
193 *ib_width
= IB_WIDTH_4X
;
197 *ib_speed
= IB_SPEED_HDR
;
198 *ib_width
= IB_WIDTH_1X
;
202 *ib_speed
= IB_SPEED_EDR
;
203 *ib_width
= IB_WIDTH_4X
;
208 *ib_speed
= IB_SPEED_SDR
;
209 *ib_width
= IB_WIDTH_1X
;
213 int qedr_query_port(struct ib_device
*ibdev
, u8 port
, struct ib_port_attr
*attr
)
215 struct qedr_dev
*dev
;
216 struct qed_rdma_port
*rdma_port
;
218 dev
= get_qedr_dev(ibdev
);
220 if (!dev
->rdma_ctx
) {
221 DP_ERR(dev
, "rdma_ctx is NULL\n");
225 rdma_port
= dev
->ops
->rdma_query_port(dev
->rdma_ctx
);
227 /* *attr being zeroed by the caller, avoid zeroing it here */
228 if (rdma_port
->port_state
== QED_RDMA_PORT_UP
) {
229 attr
->state
= IB_PORT_ACTIVE
;
230 attr
->phys_state
= IB_PORT_PHYS_STATE_LINK_UP
;
232 attr
->state
= IB_PORT_DOWN
;
233 attr
->phys_state
= IB_PORT_PHYS_STATE_DISABLED
;
235 attr
->max_mtu
= IB_MTU_4096
;
236 attr
->active_mtu
= iboe_get_mtu(dev
->ndev
->mtu
);
241 attr
->ip_gids
= true;
242 if (rdma_protocol_iwarp(&dev
->ibdev
, 1)) {
243 attr
->gid_tbl_len
= 1;
244 attr
->pkey_tbl_len
= 1;
246 attr
->gid_tbl_len
= QEDR_MAX_SGID
;
247 attr
->pkey_tbl_len
= QEDR_ROCE_PKEY_TABLE_LEN
;
249 attr
->bad_pkey_cntr
= rdma_port
->pkey_bad_counter
;
250 attr
->qkey_viol_cntr
= 0;
251 get_link_speed_and_width(rdma_port
->link_speed
,
252 &attr
->active_speed
, &attr
->active_width
);
253 attr
->max_msg_sz
= rdma_port
->max_msg_size
;
254 attr
->max_vl_num
= 4;
259 int qedr_alloc_ucontext(struct ib_ucontext
*uctx
, struct ib_udata
*udata
)
261 struct ib_device
*ibdev
= uctx
->device
;
263 struct qedr_ucontext
*ctx
= get_qedr_ucontext(uctx
);
264 struct qedr_alloc_ucontext_resp uresp
= {};
265 struct qedr_alloc_ucontext_req ureq
= {};
266 struct qedr_dev
*dev
= get_qedr_dev(ibdev
);
267 struct qed_rdma_add_user_out_params oparams
;
268 struct qedr_user_mmap_entry
*entry
;
274 rc
= ib_copy_from_udata(&ureq
, udata
,
275 min(sizeof(ureq
), udata
->inlen
));
277 DP_ERR(dev
, "Problem copying data from user space\n");
281 ctx
->db_rec
= !!(ureq
.context_flags
& QEDR_ALLOC_UCTX_DB_REC
);
284 rc
= dev
->ops
->rdma_add_user(dev
->rdma_ctx
, &oparams
);
287 "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
292 ctx
->dpi
= oparams
.dpi
;
293 ctx
->dpi_addr
= oparams
.dpi_addr
;
294 ctx
->dpi_phys_addr
= oparams
.dpi_phys_addr
;
295 ctx
->dpi_size
= oparams
.dpi_size
;
296 entry
= kzalloc(sizeof(*entry
), GFP_KERNEL
);
302 entry
->io_address
= ctx
->dpi_phys_addr
;
303 entry
->length
= ctx
->dpi_size
;
304 entry
->mmap_flag
= QEDR_USER_MMAP_IO_WC
;
305 entry
->dpi
= ctx
->dpi
;
307 rc
= rdma_user_mmap_entry_insert(uctx
, &entry
->rdma_entry
,
313 ctx
->db_mmap_entry
= &entry
->rdma_entry
;
315 if (!dev
->user_dpm_enabled
)
317 else if (rdma_protocol_iwarp(&dev
->ibdev
, 1))
318 uresp
.dpm_flags
= QEDR_DPM_TYPE_IWARP_LEGACY
;
320 uresp
.dpm_flags
= QEDR_DPM_TYPE_ROCE_ENHANCED
|
321 QEDR_DPM_TYPE_ROCE_LEGACY
;
323 uresp
.dpm_flags
|= QEDR_DPM_SIZES_SET
;
324 uresp
.ldpm_limit_size
= QEDR_LDPM_MAX_SIZE
;
325 uresp
.edpm_trans_size
= QEDR_EDPM_TRANS_SIZE
;
327 uresp
.wids_enabled
= 1;
328 uresp
.wid_count
= oparams
.wid_count
;
329 uresp
.db_pa
= rdma_user_mmap_get_offset(ctx
->db_mmap_entry
);
330 uresp
.db_size
= ctx
->dpi_size
;
331 uresp
.max_send_wr
= dev
->attr
.max_sqe
;
332 uresp
.max_recv_wr
= dev
->attr
.max_rqe
;
333 uresp
.max_srq_wr
= dev
->attr
.max_srq_wr
;
334 uresp
.sges_per_send_wr
= QEDR_MAX_SQE_ELEMENTS_PER_SQE
;
335 uresp
.sges_per_recv_wr
= QEDR_MAX_RQE_ELEMENTS_PER_RQE
;
336 uresp
.sges_per_srq_wr
= dev
->attr
.max_srq_sge
;
337 uresp
.max_cqes
= QEDR_MAX_CQES
;
339 rc
= qedr_ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
345 DP_DEBUG(dev
, QEDR_MSG_INIT
, "Allocating user context %p\n",
350 if (!ctx
->db_mmap_entry
)
351 dev
->ops
->rdma_remove_user(dev
->rdma_ctx
, ctx
->dpi
);
353 rdma_user_mmap_entry_remove(ctx
->db_mmap_entry
);
358 void qedr_dealloc_ucontext(struct ib_ucontext
*ibctx
)
360 struct qedr_ucontext
*uctx
= get_qedr_ucontext(ibctx
);
362 DP_DEBUG(uctx
->dev
, QEDR_MSG_INIT
, "Deallocating user context %p\n",
365 rdma_user_mmap_entry_remove(uctx
->db_mmap_entry
);
368 void qedr_mmap_free(struct rdma_user_mmap_entry
*rdma_entry
)
370 struct qedr_user_mmap_entry
*entry
= get_qedr_mmap_entry(rdma_entry
);
371 struct qedr_dev
*dev
= entry
->dev
;
373 if (entry
->mmap_flag
== QEDR_USER_MMAP_PHYS_PAGE
)
374 free_page((unsigned long)entry
->address
);
375 else if (entry
->mmap_flag
== QEDR_USER_MMAP_IO_WC
)
376 dev
->ops
->rdma_remove_user(dev
->rdma_ctx
, entry
->dpi
);
381 int qedr_mmap(struct ib_ucontext
*ucontext
, struct vm_area_struct
*vma
)
383 struct ib_device
*dev
= ucontext
->device
;
384 size_t length
= vma
->vm_end
- vma
->vm_start
;
385 struct rdma_user_mmap_entry
*rdma_entry
;
386 struct qedr_user_mmap_entry
*entry
;
391 "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
392 vma
->vm_start
, vma
->vm_end
, length
, vma
->vm_pgoff
);
394 rdma_entry
= rdma_user_mmap_entry_get(ucontext
, vma
);
396 ibdev_dbg(dev
, "pgoff[%#lx] does not have valid entry\n",
400 entry
= get_qedr_mmap_entry(rdma_entry
);
402 "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
403 entry
->io_address
, length
, entry
->mmap_flag
);
405 switch (entry
->mmap_flag
) {
406 case QEDR_USER_MMAP_IO_WC
:
407 pfn
= entry
->io_address
>> PAGE_SHIFT
;
408 rc
= rdma_user_mmap_io(ucontext
, vma
, pfn
, length
,
409 pgprot_writecombine(vma
->vm_page_prot
),
412 case QEDR_USER_MMAP_PHYS_PAGE
:
413 rc
= vm_insert_page(vma
, vma
->vm_start
,
414 virt_to_page(entry
->address
));
422 "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
423 entry
->io_address
, length
, entry
->mmap_flag
, rc
);
425 rdma_user_mmap_entry_put(rdma_entry
);
429 int qedr_alloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
431 struct ib_device
*ibdev
= ibpd
->device
;
432 struct qedr_dev
*dev
= get_qedr_dev(ibdev
);
433 struct qedr_pd
*pd
= get_qedr_pd(ibpd
);
437 DP_DEBUG(dev
, QEDR_MSG_INIT
, "Function called from: %s\n",
438 udata
? "User Lib" : "Kernel");
440 if (!dev
->rdma_ctx
) {
441 DP_ERR(dev
, "invalid RDMA context\n");
445 rc
= dev
->ops
->rdma_alloc_pd(dev
->rdma_ctx
, &pd_id
);
452 struct qedr_alloc_pd_uresp uresp
= {
455 struct qedr_ucontext
*context
= rdma_udata_to_drv_context(
456 udata
, struct qedr_ucontext
, ibucontext
);
458 rc
= qedr_ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
460 DP_ERR(dev
, "copy error pd_id=0x%x.\n", pd_id
);
461 dev
->ops
->rdma_dealloc_pd(dev
->rdma_ctx
, pd_id
);
472 void qedr_dealloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
474 struct qedr_dev
*dev
= get_qedr_dev(ibpd
->device
);
475 struct qedr_pd
*pd
= get_qedr_pd(ibpd
);
477 DP_DEBUG(dev
, QEDR_MSG_INIT
, "Deallocating PD %d\n", pd
->pd_id
);
478 dev
->ops
->rdma_dealloc_pd(dev
->rdma_ctx
, pd
->pd_id
);
481 static void qedr_free_pbl(struct qedr_dev
*dev
,
482 struct qedr_pbl_info
*pbl_info
, struct qedr_pbl
*pbl
)
484 struct pci_dev
*pdev
= dev
->pdev
;
487 for (i
= 0; i
< pbl_info
->num_pbls
; i
++) {
490 dma_free_coherent(&pdev
->dev
, pbl_info
->pbl_size
,
491 pbl
[i
].va
, pbl
[i
].pa
);
497 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
498 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
500 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
501 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
502 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
504 static struct qedr_pbl
*qedr_alloc_pbl_tbl(struct qedr_dev
*dev
,
505 struct qedr_pbl_info
*pbl_info
,
508 struct pci_dev
*pdev
= dev
->pdev
;
509 struct qedr_pbl
*pbl_table
;
510 dma_addr_t
*pbl_main_tbl
;
515 pbl_table
= kcalloc(pbl_info
->num_pbls
, sizeof(*pbl_table
), flags
);
517 return ERR_PTR(-ENOMEM
);
519 for (i
= 0; i
< pbl_info
->num_pbls
; i
++) {
520 va
= dma_alloc_coherent(&pdev
->dev
, pbl_info
->pbl_size
, &pa
,
525 pbl_table
[i
].va
= va
;
526 pbl_table
[i
].pa
= pa
;
529 /* Two-Layer PBLs, if we have more than one pbl we need to initialize
530 * the first one with physical pointers to all of the rest
532 pbl_main_tbl
= (dma_addr_t
*)pbl_table
[0].va
;
533 for (i
= 0; i
< pbl_info
->num_pbls
- 1; i
++)
534 pbl_main_tbl
[i
] = pbl_table
[i
+ 1].pa
;
539 for (i
--; i
>= 0; i
--)
540 dma_free_coherent(&pdev
->dev
, pbl_info
->pbl_size
,
541 pbl_table
[i
].va
, pbl_table
[i
].pa
);
543 qedr_free_pbl(dev
, pbl_info
, pbl_table
);
545 return ERR_PTR(-ENOMEM
);
548 static int qedr_prepare_pbl_tbl(struct qedr_dev
*dev
,
549 struct qedr_pbl_info
*pbl_info
,
550 u32 num_pbes
, int two_layer_capable
)
556 if ((num_pbes
> MAX_PBES_ON_PAGE
) && two_layer_capable
) {
557 if (num_pbes
> MAX_PBES_TWO_LAYER
) {
558 DP_ERR(dev
, "prepare pbl table: too many pages %d\n",
563 /* calculate required pbl page size */
564 pbl_size
= MIN_FW_PBL_PAGE_SIZE
;
565 pbl_capacity
= NUM_PBES_ON_PAGE(pbl_size
) *
566 NUM_PBES_ON_PAGE(pbl_size
);
568 while (pbl_capacity
< num_pbes
) {
570 pbl_capacity
= pbl_size
/ sizeof(u64
);
571 pbl_capacity
= pbl_capacity
* pbl_capacity
;
574 num_pbls
= DIV_ROUND_UP(num_pbes
, NUM_PBES_ON_PAGE(pbl_size
));
575 num_pbls
++; /* One for the layer0 ( points to the pbls) */
576 pbl_info
->two_layered
= true;
578 /* One layered PBL */
580 pbl_size
= max_t(u32
, MIN_FW_PBL_PAGE_SIZE
,
581 roundup_pow_of_two((num_pbes
* sizeof(u64
))));
582 pbl_info
->two_layered
= false;
585 pbl_info
->num_pbls
= num_pbls
;
586 pbl_info
->pbl_size
= pbl_size
;
587 pbl_info
->num_pbes
= num_pbes
;
589 DP_DEBUG(dev
, QEDR_MSG_MR
,
590 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
591 pbl_info
->num_pbes
, pbl_info
->num_pbls
, pbl_info
->pbl_size
);
596 static void qedr_populate_pbls(struct qedr_dev
*dev
, struct ib_umem
*umem
,
597 struct qedr_pbl
*pbl
,
598 struct qedr_pbl_info
*pbl_info
, u32 pg_shift
)
600 int pbe_cnt
, total_num_pbes
= 0;
601 u32 fw_pg_cnt
, fw_pg_per_umem_pg
;
602 struct qedr_pbl
*pbl_tbl
;
603 struct sg_dma_page_iter sg_iter
;
607 if (!pbl_info
->num_pbes
)
610 /* If we have a two layered pbl, the first pbl points to the rest
611 * of the pbls and the first entry lays on the second pbl in the table
613 if (pbl_info
->two_layered
)
618 pbe
= (struct regpair
*)pbl_tbl
->va
;
620 DP_ERR(dev
, "cannot populate PBL due to a NULL PBE\n");
626 fw_pg_per_umem_pg
= BIT(PAGE_SHIFT
- pg_shift
);
628 for_each_sg_dma_page (umem
->sg_head
.sgl
, &sg_iter
, umem
->nmap
, 0) {
629 pg_addr
= sg_page_iter_dma_address(&sg_iter
);
630 for (fw_pg_cnt
= 0; fw_pg_cnt
< fw_pg_per_umem_pg
;) {
631 pbe
->lo
= cpu_to_le32(pg_addr
);
632 pbe
->hi
= cpu_to_le32(upper_32_bits(pg_addr
));
634 pg_addr
+= BIT(pg_shift
);
639 if (total_num_pbes
== pbl_info
->num_pbes
)
642 /* If the given pbl is full storing the pbes,
645 if (pbe_cnt
== (pbl_info
->pbl_size
/ sizeof(u64
))) {
647 pbe
= (struct regpair
*)pbl_tbl
->va
;
656 static int qedr_db_recovery_add(struct qedr_dev
*dev
,
657 void __iomem
*db_addr
,
659 enum qed_db_rec_width db_width
,
660 enum qed_db_rec_space db_space
)
663 DP_DEBUG(dev
, QEDR_MSG_INIT
, "avoiding db rec since old lib\n");
667 return dev
->ops
->common
->db_recovery_add(dev
->cdev
, db_addr
, db_data
,
671 static void qedr_db_recovery_del(struct qedr_dev
*dev
,
672 void __iomem
*db_addr
,
676 DP_DEBUG(dev
, QEDR_MSG_INIT
, "avoiding db rec since old lib\n");
680 /* Ignore return code as there is not much we can do about it. Error
681 * log will be printed inside.
683 dev
->ops
->common
->db_recovery_del(dev
->cdev
, db_addr
, db_data
);
686 static int qedr_copy_cq_uresp(struct qedr_dev
*dev
,
687 struct qedr_cq
*cq
, struct ib_udata
*udata
,
690 struct qedr_create_cq_uresp uresp
;
693 memset(&uresp
, 0, sizeof(uresp
));
695 uresp
.db_offset
= db_offset
;
696 uresp
.icid
= cq
->icid
;
697 if (cq
->q
.db_mmap_entry
)
699 rdma_user_mmap_get_offset(cq
->q
.db_mmap_entry
);
701 rc
= qedr_ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
703 DP_ERR(dev
, "copy error cqid=0x%x.\n", cq
->icid
);
708 static void consume_cqe(struct qedr_cq
*cq
)
710 if (cq
->latest_cqe
== cq
->toggle_cqe
)
711 cq
->pbl_toggle
^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK
;
713 cq
->latest_cqe
= qed_chain_consume(&cq
->pbl
);
716 static inline int qedr_align_cq_entries(int entries
)
718 u64 size
, aligned_size
;
720 /* We allocate an extra entry that we don't report to the FW. */
721 size
= (entries
+ 1) * QEDR_CQE_SIZE
;
722 aligned_size
= ALIGN(size
, PAGE_SIZE
);
724 return aligned_size
/ QEDR_CQE_SIZE
;
727 static int qedr_init_user_db_rec(struct ib_udata
*udata
,
728 struct qedr_dev
*dev
, struct qedr_userq
*q
,
729 bool requires_db_rec
)
731 struct qedr_ucontext
*uctx
=
732 rdma_udata_to_drv_context(udata
, struct qedr_ucontext
,
734 struct qedr_user_mmap_entry
*entry
;
737 /* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
738 if (requires_db_rec
== 0 || !uctx
->db_rec
)
741 /* Allocate a page for doorbell recovery, add to mmap */
742 q
->db_rec_data
= (void *)get_zeroed_page(GFP_USER
);
743 if (!q
->db_rec_data
) {
744 DP_ERR(dev
, "get_zeroed_page failed\n");
748 entry
= kzalloc(sizeof(*entry
), GFP_KERNEL
);
750 goto err_free_db_data
;
752 entry
->address
= q
->db_rec_data
;
753 entry
->length
= PAGE_SIZE
;
754 entry
->mmap_flag
= QEDR_USER_MMAP_PHYS_PAGE
;
755 rc
= rdma_user_mmap_entry_insert(&uctx
->ibucontext
,
761 q
->db_mmap_entry
= &entry
->rdma_entry
;
769 free_page((unsigned long)q
->db_rec_data
);
770 q
->db_rec_data
= NULL
;
774 static inline int qedr_init_user_queue(struct ib_udata
*udata
,
775 struct qedr_dev
*dev
,
776 struct qedr_userq
*q
, u64 buf_addr
,
777 size_t buf_len
, bool requires_db_rec
,
784 q
->buf_addr
= buf_addr
;
785 q
->buf_len
= buf_len
;
786 q
->umem
= ib_umem_get(&dev
->ibdev
, q
->buf_addr
, q
->buf_len
, access
);
787 if (IS_ERR(q
->umem
)) {
788 DP_ERR(dev
, "create user queue: failed ib_umem_get, got %ld\n",
790 return PTR_ERR(q
->umem
);
793 fw_pages
= ib_umem_page_count(q
->umem
) <<
794 (PAGE_SHIFT
- FW_PAGE_SHIFT
);
796 rc
= qedr_prepare_pbl_tbl(dev
, &q
->pbl_info
, fw_pages
, 0);
800 if (alloc_and_init
) {
801 q
->pbl_tbl
= qedr_alloc_pbl_tbl(dev
, &q
->pbl_info
, GFP_KERNEL
);
802 if (IS_ERR(q
->pbl_tbl
)) {
803 rc
= PTR_ERR(q
->pbl_tbl
);
806 qedr_populate_pbls(dev
, q
->umem
, q
->pbl_tbl
, &q
->pbl_info
,
809 q
->pbl_tbl
= kzalloc(sizeof(*q
->pbl_tbl
), GFP_KERNEL
);
816 /* mmap the user address used to store doorbell data for recovery */
817 return qedr_init_user_db_rec(udata
, dev
, q
, requires_db_rec
);
820 ib_umem_release(q
->umem
);
826 static inline void qedr_init_cq_params(struct qedr_cq
*cq
,
827 struct qedr_ucontext
*ctx
,
828 struct qedr_dev
*dev
, int vector
,
829 int chain_entries
, int page_cnt
,
831 struct qed_rdma_create_cq_in_params
834 memset(params
, 0, sizeof(*params
));
835 params
->cq_handle_hi
= upper_32_bits((uintptr_t)cq
);
836 params
->cq_handle_lo
= lower_32_bits((uintptr_t)cq
);
837 params
->cnq_id
= vector
;
838 params
->cq_size
= chain_entries
- 1;
839 params
->dpi
= (ctx
) ? ctx
->dpi
: dev
->dpi
;
840 params
->pbl_num_pages
= page_cnt
;
841 params
->pbl_ptr
= pbl_ptr
;
842 params
->pbl_two_level
= 0;
845 static void doorbell_cq(struct qedr_cq
*cq
, u32 cons
, u8 flags
)
847 cq
->db
.data
.agg_flags
= flags
;
848 cq
->db
.data
.value
= cpu_to_le32(cons
);
849 writeq(cq
->db
.raw
, cq
->db_addr
);
852 int qedr_arm_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
854 struct qedr_cq
*cq
= get_qedr_cq(ibcq
);
855 unsigned long sflags
;
856 struct qedr_dev
*dev
;
858 dev
= get_qedr_dev(ibcq
->device
);
862 "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
868 if (cq
->cq_type
== QEDR_CQ_TYPE_GSI
)
871 spin_lock_irqsave(&cq
->cq_lock
, sflags
);
875 if (flags
& IB_CQ_SOLICITED
)
876 cq
->arm_flags
|= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD
;
878 if (flags
& IB_CQ_NEXT_COMP
)
879 cq
->arm_flags
|= DQ_UCM_ROCE_CQ_ARM_CF_CMD
;
881 doorbell_cq(cq
, cq
->cq_cons
- 1, cq
->arm_flags
);
883 spin_unlock_irqrestore(&cq
->cq_lock
, sflags
);
888 int qedr_create_cq(struct ib_cq
*ibcq
, const struct ib_cq_init_attr
*attr
,
889 struct ib_udata
*udata
)
891 struct ib_device
*ibdev
= ibcq
->device
;
892 struct qedr_ucontext
*ctx
= rdma_udata_to_drv_context(
893 udata
, struct qedr_ucontext
, ibucontext
);
894 struct qed_rdma_destroy_cq_out_params destroy_oparams
;
895 struct qed_rdma_destroy_cq_in_params destroy_iparams
;
896 struct qedr_dev
*dev
= get_qedr_dev(ibdev
);
897 struct qed_rdma_create_cq_in_params params
;
898 struct qedr_create_cq_ureq ureq
= {};
899 int vector
= attr
->comp_vector
;
900 int entries
= attr
->cqe
;
901 struct qedr_cq
*cq
= get_qedr_cq(ibcq
);
909 DP_DEBUG(dev
, QEDR_MSG_INIT
,
910 "create_cq: called from %s. entries=%d, vector=%d\n",
911 udata
? "User Lib" : "Kernel", entries
, vector
);
913 if (entries
> QEDR_MAX_CQES
) {
915 "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
916 entries
, QEDR_MAX_CQES
);
920 chain_entries
= qedr_align_cq_entries(entries
);
921 chain_entries
= min_t(int, chain_entries
, QEDR_MAX_CQES
);
923 /* calc db offset. user will add DPI base, kernel will add db addr */
924 db_offset
= DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT
);
927 if (ib_copy_from_udata(&ureq
, udata
, min(sizeof(ureq
),
930 "create cq: problem copying data from user space\n");
936 "create cq: cannot create a cq with 0 entries\n");
940 cq
->cq_type
= QEDR_CQ_TYPE_USER
;
942 rc
= qedr_init_user_queue(udata
, dev
, &cq
->q
, ureq
.addr
,
943 ureq
.len
, true, IB_ACCESS_LOCAL_WRITE
,
948 pbl_ptr
= cq
->q
.pbl_tbl
->pa
;
949 page_cnt
= cq
->q
.pbl_info
.num_pbes
;
951 cq
->ibcq
.cqe
= chain_entries
;
952 cq
->q
.db_addr
= ctx
->dpi_addr
+ db_offset
;
954 cq
->cq_type
= QEDR_CQ_TYPE_KERNEL
;
956 rc
= dev
->ops
->common
->chain_alloc(dev
->cdev
,
957 QED_CHAIN_USE_TO_CONSUME
,
959 QED_CHAIN_CNT_TYPE_U32
,
961 sizeof(union rdma_cqe
),
966 page_cnt
= qed_chain_get_page_cnt(&cq
->pbl
);
967 pbl_ptr
= qed_chain_get_pbl_phys(&cq
->pbl
);
968 cq
->ibcq
.cqe
= cq
->pbl
.capacity
;
971 qedr_init_cq_params(cq
, ctx
, dev
, vector
, chain_entries
, page_cnt
,
974 rc
= dev
->ops
->rdma_create_cq(dev
->rdma_ctx
, ¶ms
, &icid
);
979 cq
->sig
= QEDR_CQ_MAGIC_NUMBER
;
980 spin_lock_init(&cq
->cq_lock
);
983 rc
= qedr_copy_cq_uresp(dev
, cq
, udata
, db_offset
);
987 rc
= qedr_db_recovery_add(dev
, cq
->q
.db_addr
,
988 &cq
->q
.db_rec_data
->db_data
,
995 /* Generate doorbell address. */
996 cq
->db
.data
.icid
= cq
->icid
;
997 cq
->db_addr
= dev
->db_addr
+ db_offset
;
998 cq
->db
.data
.params
= DB_AGG_CMD_SET
<<
999 RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT
;
1001 /* point to the very last element, passing it we will toggle */
1002 cq
->toggle_cqe
= qed_chain_get_last_elem(&cq
->pbl
);
1003 cq
->pbl_toggle
= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK
;
1004 cq
->latest_cqe
= NULL
;
1006 cq
->cq_cons
= qed_chain_get_cons_idx_u32(&cq
->pbl
);
1008 rc
= qedr_db_recovery_add(dev
, cq
->db_addr
, &cq
->db
.data
,
1009 DB_REC_WIDTH_64B
, DB_REC_KERNEL
);
1014 DP_DEBUG(dev
, QEDR_MSG_CQ
,
1015 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1016 cq
->icid
, cq
, params
.cq_size
);
1021 destroy_iparams
.icid
= cq
->icid
;
1022 dev
->ops
->rdma_destroy_cq(dev
->rdma_ctx
, &destroy_iparams
,
1026 qedr_free_pbl(dev
, &cq
->q
.pbl_info
, cq
->q
.pbl_tbl
);
1027 ib_umem_release(cq
->q
.umem
);
1028 if (cq
->q
.db_mmap_entry
)
1029 rdma_user_mmap_entry_remove(cq
->q
.db_mmap_entry
);
1031 dev
->ops
->common
->chain_free(dev
->cdev
, &cq
->pbl
);
1037 int qedr_resize_cq(struct ib_cq
*ibcq
, int new_cnt
, struct ib_udata
*udata
)
1039 struct qedr_dev
*dev
= get_qedr_dev(ibcq
->device
);
1040 struct qedr_cq
*cq
= get_qedr_cq(ibcq
);
1042 DP_ERR(dev
, "cq %p RESIZE NOT SUPPORTED\n", cq
);
1047 #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
1048 #define QEDR_DESTROY_CQ_ITER_DURATION (10)
1050 void qedr_destroy_cq(struct ib_cq
*ibcq
, struct ib_udata
*udata
)
1052 struct qedr_dev
*dev
= get_qedr_dev(ibcq
->device
);
1053 struct qed_rdma_destroy_cq_out_params oparams
;
1054 struct qed_rdma_destroy_cq_in_params iparams
;
1055 struct qedr_cq
*cq
= get_qedr_cq(ibcq
);
1058 DP_DEBUG(dev
, QEDR_MSG_CQ
, "destroy cq %p (icid=%d)\n", cq
, cq
->icid
);
1062 /* GSIs CQs are handled by driver, so they don't exist in the FW */
1063 if (cq
->cq_type
== QEDR_CQ_TYPE_GSI
) {
1064 qedr_db_recovery_del(dev
, cq
->db_addr
, &cq
->db
.data
);
1068 iparams
.icid
= cq
->icid
;
1069 dev
->ops
->rdma_destroy_cq(dev
->rdma_ctx
, &iparams
, &oparams
);
1070 dev
->ops
->common
->chain_free(dev
->cdev
, &cq
->pbl
);
1073 qedr_free_pbl(dev
, &cq
->q
.pbl_info
, cq
->q
.pbl_tbl
);
1074 ib_umem_release(cq
->q
.umem
);
1076 if (cq
->q
.db_rec_data
) {
1077 qedr_db_recovery_del(dev
, cq
->q
.db_addr
,
1078 &cq
->q
.db_rec_data
->db_data
);
1079 rdma_user_mmap_entry_remove(cq
->q
.db_mmap_entry
);
1082 qedr_db_recovery_del(dev
, cq
->db_addr
, &cq
->db
.data
);
1085 /* We don't want the IRQ handler to handle a non-existing CQ so we
1086 * wait until all CNQ interrupts, if any, are received. This will always
1087 * happen and will always happen very fast. If not, then a serious error
1088 * has occured. That is why we can use a long delay.
1089 * We spin for a short time so we don’t lose time on context switching
1090 * in case all the completions are handled in that span. Otherwise
1091 * we sleep for a while and check again. Since the CNQ may be
1092 * associated with (only) the current CPU we use msleep to allow the
1093 * current CPU to be freed.
1094 * The CNQ notification is increased in qedr_irq_handler().
1096 iter
= QEDR_DESTROY_CQ_MAX_ITERATIONS
;
1097 while (oparams
.num_cq_notif
!= READ_ONCE(cq
->cnq_notif
) && iter
) {
1098 udelay(QEDR_DESTROY_CQ_ITER_DURATION
);
1102 iter
= QEDR_DESTROY_CQ_MAX_ITERATIONS
;
1103 while (oparams
.num_cq_notif
!= READ_ONCE(cq
->cnq_notif
) && iter
) {
1104 msleep(QEDR_DESTROY_CQ_ITER_DURATION
);
1108 /* Note that we don't need to have explicit code to wait for the
1109 * completion of the event handler because it is invoked from the EQ.
1110 * Since the destroy CQ ramrod has also been received on the EQ we can
1111 * be certain that there's no event handler in process.
1115 static inline int get_gid_info_from_table(struct ib_qp
*ibqp
,
1116 struct ib_qp_attr
*attr
,
1118 struct qed_rdma_modify_qp_in_params
1121 const struct ib_gid_attr
*gid_attr
;
1122 enum rdma_network_type nw_type
;
1123 const struct ib_global_route
*grh
= rdma_ah_read_grh(&attr
->ah_attr
);
1128 gid_attr
= grh
->sgid_attr
;
1129 ret
= rdma_read_gid_l2_fields(gid_attr
, &qp_params
->vlan_id
, NULL
);
1133 nw_type
= rdma_gid_attr_network_type(gid_attr
);
1135 case RDMA_NETWORK_IPV6
:
1136 memcpy(&qp_params
->sgid
.bytes
[0], &gid_attr
->gid
.raw
[0],
1137 sizeof(qp_params
->sgid
));
1138 memcpy(&qp_params
->dgid
.bytes
[0],
1140 sizeof(qp_params
->dgid
));
1141 qp_params
->roce_mode
= ROCE_V2_IPV6
;
1142 SET_FIELD(qp_params
->modify_flags
,
1143 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE
, 1);
1145 case RDMA_NETWORK_IB
:
1146 memcpy(&qp_params
->sgid
.bytes
[0], &gid_attr
->gid
.raw
[0],
1147 sizeof(qp_params
->sgid
));
1148 memcpy(&qp_params
->dgid
.bytes
[0],
1150 sizeof(qp_params
->dgid
));
1151 qp_params
->roce_mode
= ROCE_V1
;
1153 case RDMA_NETWORK_IPV4
:
1154 memset(&qp_params
->sgid
, 0, sizeof(qp_params
->sgid
));
1155 memset(&qp_params
->dgid
, 0, sizeof(qp_params
->dgid
));
1156 ipv4_addr
= qedr_get_ipv4_from_gid(gid_attr
->gid
.raw
);
1157 qp_params
->sgid
.ipv4_addr
= ipv4_addr
;
1159 qedr_get_ipv4_from_gid(grh
->dgid
.raw
);
1160 qp_params
->dgid
.ipv4_addr
= ipv4_addr
;
1161 SET_FIELD(qp_params
->modify_flags
,
1162 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE
, 1);
1163 qp_params
->roce_mode
= ROCE_V2_IPV4
;
1167 for (i
= 0; i
< 4; i
++) {
1168 qp_params
->sgid
.dwords
[i
] = ntohl(qp_params
->sgid
.dwords
[i
]);
1169 qp_params
->dgid
.dwords
[i
] = ntohl(qp_params
->dgid
.dwords
[i
]);
1172 if (qp_params
->vlan_id
>= VLAN_CFI_MASK
)
1173 qp_params
->vlan_id
= 0;
1178 static int qedr_check_qp_attrs(struct ib_pd
*ibpd
, struct qedr_dev
*dev
,
1179 struct ib_qp_init_attr
*attrs
,
1180 struct ib_udata
*udata
)
1182 struct qedr_device_attr
*qattr
= &dev
->attr
;
1184 /* QP0... attrs->qp_type == IB_QPT_GSI */
1185 if (attrs
->qp_type
!= IB_QPT_RC
&& attrs
->qp_type
!= IB_QPT_GSI
) {
1186 DP_DEBUG(dev
, QEDR_MSG_QP
,
1187 "create qp: unsupported qp type=0x%x requested\n",
1192 if (attrs
->cap
.max_send_wr
> qattr
->max_sqe
) {
1194 "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1195 attrs
->cap
.max_send_wr
, qattr
->max_sqe
);
1199 if (attrs
->cap
.max_inline_data
> qattr
->max_inline
) {
1201 "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1202 attrs
->cap
.max_inline_data
, qattr
->max_inline
);
1206 if (attrs
->cap
.max_send_sge
> qattr
->max_sge
) {
1208 "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1209 attrs
->cap
.max_send_sge
, qattr
->max_sge
);
1213 if (attrs
->cap
.max_recv_sge
> qattr
->max_sge
) {
1215 "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1216 attrs
->cap
.max_recv_sge
, qattr
->max_sge
);
1220 /* Unprivileged user space cannot create special QP */
1221 if (udata
&& attrs
->qp_type
== IB_QPT_GSI
) {
1223 "create qp: userspace can't create special QPs of type=0x%x\n",
1231 static int qedr_copy_srq_uresp(struct qedr_dev
*dev
,
1232 struct qedr_srq
*srq
, struct ib_udata
*udata
)
1234 struct qedr_create_srq_uresp uresp
= {};
1237 uresp
.srq_id
= srq
->srq_id
;
1239 rc
= ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
1241 DP_ERR(dev
, "create srq: problem copying data to user space\n");
1246 static void qedr_copy_rq_uresp(struct qedr_dev
*dev
,
1247 struct qedr_create_qp_uresp
*uresp
,
1250 /* iWARP requires two doorbells per RQ. */
1251 if (rdma_protocol_iwarp(&dev
->ibdev
, 1)) {
1252 uresp
->rq_db_offset
=
1253 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD
);
1254 uresp
->rq_db2_offset
= DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS
);
1256 uresp
->rq_db_offset
=
1257 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD
);
1260 uresp
->rq_icid
= qp
->icid
;
1261 if (qp
->urq
.db_mmap_entry
)
1262 uresp
->rq_db_rec_addr
=
1263 rdma_user_mmap_get_offset(qp
->urq
.db_mmap_entry
);
1266 static void qedr_copy_sq_uresp(struct qedr_dev
*dev
,
1267 struct qedr_create_qp_uresp
*uresp
,
1270 uresp
->sq_db_offset
= DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD
);
1272 /* iWARP uses the same cid for rq and sq */
1273 if (rdma_protocol_iwarp(&dev
->ibdev
, 1))
1274 uresp
->sq_icid
= qp
->icid
;
1276 uresp
->sq_icid
= qp
->icid
+ 1;
1278 if (qp
->usq
.db_mmap_entry
)
1279 uresp
->sq_db_rec_addr
=
1280 rdma_user_mmap_get_offset(qp
->usq
.db_mmap_entry
);
1283 static int qedr_copy_qp_uresp(struct qedr_dev
*dev
,
1284 struct qedr_qp
*qp
, struct ib_udata
*udata
,
1285 struct qedr_create_qp_uresp
*uresp
)
1289 memset(uresp
, 0, sizeof(*uresp
));
1290 qedr_copy_sq_uresp(dev
, uresp
, qp
);
1291 qedr_copy_rq_uresp(dev
, uresp
, qp
);
1293 uresp
->atomic_supported
= dev
->atomic_cap
!= IB_ATOMIC_NONE
;
1294 uresp
->qp_id
= qp
->qp_id
;
1296 rc
= qedr_ib_copy_to_udata(udata
, uresp
, sizeof(*uresp
));
1299 "create qp: failed a copy to user space with qp icid=0x%x.\n",
1305 static void qedr_set_common_qp_params(struct qedr_dev
*dev
,
1308 struct ib_qp_init_attr
*attrs
)
1310 spin_lock_init(&qp
->q_lock
);
1311 if (rdma_protocol_iwarp(&dev
->ibdev
, 1)) {
1312 kref_init(&qp
->refcnt
);
1313 init_completion(&qp
->iwarp_cm_comp
);
1316 qp
->qp_type
= attrs
->qp_type
;
1317 qp
->max_inline_data
= attrs
->cap
.max_inline_data
;
1318 qp
->sq
.max_sges
= attrs
->cap
.max_send_sge
;
1319 qp
->state
= QED_ROCE_QP_STATE_RESET
;
1320 qp
->signaled
= (attrs
->sq_sig_type
== IB_SIGNAL_ALL_WR
) ? true : false;
1321 qp
->sq_cq
= get_qedr_cq(attrs
->send_cq
);
1325 qp
->srq
= get_qedr_srq(attrs
->srq
);
1327 qp
->rq_cq
= get_qedr_cq(attrs
->recv_cq
);
1328 qp
->rq
.max_sges
= attrs
->cap
.max_recv_sge
;
1329 DP_DEBUG(dev
, QEDR_MSG_QP
,
1330 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1331 qp
->rq
.max_sges
, qp
->rq_cq
->icid
);
1334 DP_DEBUG(dev
, QEDR_MSG_QP
,
1335 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1336 pd
->pd_id
, qp
->qp_type
, qp
->max_inline_data
,
1337 qp
->state
, qp
->signaled
, (attrs
->srq
) ? 1 : 0);
1338 DP_DEBUG(dev
, QEDR_MSG_QP
,
1339 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1340 qp
->sq
.max_sges
, qp
->sq_cq
->icid
);
1343 static int qedr_set_roce_db_info(struct qedr_dev
*dev
, struct qedr_qp
*qp
)
1347 qp
->sq
.db
= dev
->db_addr
+
1348 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD
);
1349 qp
->sq
.db_data
.data
.icid
= qp
->icid
+ 1;
1350 rc
= qedr_db_recovery_add(dev
, qp
->sq
.db
,
1358 qp
->rq
.db
= dev
->db_addr
+
1359 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD
);
1360 qp
->rq
.db_data
.data
.icid
= qp
->icid
;
1362 rc
= qedr_db_recovery_add(dev
, qp
->rq
.db
,
1367 qedr_db_recovery_del(dev
, qp
->sq
.db
,
1374 static int qedr_check_srq_params(struct qedr_dev
*dev
,
1375 struct ib_srq_init_attr
*attrs
,
1376 struct ib_udata
*udata
)
1378 struct qedr_device_attr
*qattr
= &dev
->attr
;
1380 if (attrs
->attr
.max_wr
> qattr
->max_srq_wr
) {
1382 "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1383 attrs
->attr
.max_wr
, qattr
->max_srq_wr
);
1387 if (attrs
->attr
.max_sge
> qattr
->max_sge
) {
1389 "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1390 attrs
->attr
.max_sge
, qattr
->max_sge
);
1397 static void qedr_free_srq_user_params(struct qedr_srq
*srq
)
1399 qedr_free_pbl(srq
->dev
, &srq
->usrq
.pbl_info
, srq
->usrq
.pbl_tbl
);
1400 ib_umem_release(srq
->usrq
.umem
);
1401 ib_umem_release(srq
->prod_umem
);
1404 static void qedr_free_srq_kernel_params(struct qedr_srq
*srq
)
1406 struct qedr_srq_hwq_info
*hw_srq
= &srq
->hw_srq
;
1407 struct qedr_dev
*dev
= srq
->dev
;
1409 dev
->ops
->common
->chain_free(dev
->cdev
, &hw_srq
->pbl
);
1411 dma_free_coherent(&dev
->pdev
->dev
, sizeof(struct rdma_srq_producers
),
1412 hw_srq
->virt_prod_pair_addr
,
1413 hw_srq
->phy_prod_pair_addr
);
1416 static int qedr_init_srq_user_params(struct ib_udata
*udata
,
1417 struct qedr_srq
*srq
,
1418 struct qedr_create_srq_ureq
*ureq
,
1421 struct scatterlist
*sg
;
1424 rc
= qedr_init_user_queue(udata
, srq
->dev
, &srq
->usrq
, ureq
->srq_addr
,
1425 ureq
->srq_len
, false, access
, 1);
1429 srq
->prod_umem
= ib_umem_get(srq
->ibsrq
.device
, ureq
->prod_pair_addr
,
1430 sizeof(struct rdma_srq_producers
), access
);
1431 if (IS_ERR(srq
->prod_umem
)) {
1432 qedr_free_pbl(srq
->dev
, &srq
->usrq
.pbl_info
, srq
->usrq
.pbl_tbl
);
1433 ib_umem_release(srq
->usrq
.umem
);
1435 "create srq: failed ib_umem_get for producer, got %ld\n",
1436 PTR_ERR(srq
->prod_umem
));
1437 return PTR_ERR(srq
->prod_umem
);
1440 sg
= srq
->prod_umem
->sg_head
.sgl
;
1441 srq
->hw_srq
.phy_prod_pair_addr
= sg_dma_address(sg
);
1446 static int qedr_alloc_srq_kernel_params(struct qedr_srq
*srq
,
1447 struct qedr_dev
*dev
,
1448 struct ib_srq_init_attr
*init_attr
)
1450 struct qedr_srq_hwq_info
*hw_srq
= &srq
->hw_srq
;
1451 dma_addr_t phy_prod_pair_addr
;
1456 va
= dma_alloc_coherent(&dev
->pdev
->dev
,
1457 sizeof(struct rdma_srq_producers
),
1458 &phy_prod_pair_addr
, GFP_KERNEL
);
1461 "create srq: failed to allocate dma memory for producer\n");
1465 hw_srq
->phy_prod_pair_addr
= phy_prod_pair_addr
;
1466 hw_srq
->virt_prod_pair_addr
= va
;
1468 num_elems
= init_attr
->attr
.max_wr
* RDMA_MAX_SRQ_WQE_SIZE
;
1469 rc
= dev
->ops
->common
->chain_alloc(dev
->cdev
,
1470 QED_CHAIN_USE_TO_CONSUME_PRODUCE
,
1472 QED_CHAIN_CNT_TYPE_U32
,
1474 QEDR_SRQ_WQE_ELEM_SIZE
,
1475 &hw_srq
->pbl
, NULL
);
1479 hw_srq
->num_elems
= num_elems
;
1484 dma_free_coherent(&dev
->pdev
->dev
, sizeof(struct rdma_srq_producers
),
1485 va
, phy_prod_pair_addr
);
1489 int qedr_create_srq(struct ib_srq
*ibsrq
, struct ib_srq_init_attr
*init_attr
,
1490 struct ib_udata
*udata
)
1492 struct qed_rdma_destroy_srq_in_params destroy_in_params
;
1493 struct qed_rdma_create_srq_in_params in_params
= {};
1494 struct qedr_dev
*dev
= get_qedr_dev(ibsrq
->device
);
1495 struct qed_rdma_create_srq_out_params out_params
;
1496 struct qedr_pd
*pd
= get_qedr_pd(ibsrq
->pd
);
1497 struct qedr_create_srq_ureq ureq
= {};
1498 u64 pbl_base_addr
, phy_prod_pair_addr
;
1499 struct qedr_srq_hwq_info
*hw_srq
;
1500 u32 page_cnt
, page_size
;
1501 struct qedr_srq
*srq
= get_qedr_srq(ibsrq
);
1504 DP_DEBUG(dev
, QEDR_MSG_QP
,
1505 "create SRQ called from %s (pd %p)\n",
1506 (udata
) ? "User lib" : "kernel", pd
);
1508 rc
= qedr_check_srq_params(dev
, init_attr
, udata
);
1513 hw_srq
= &srq
->hw_srq
;
1514 spin_lock_init(&srq
->lock
);
1516 hw_srq
->max_wr
= init_attr
->attr
.max_wr
;
1517 hw_srq
->max_sges
= init_attr
->attr
.max_sge
;
1520 if (ib_copy_from_udata(&ureq
, udata
, min(sizeof(ureq
),
1523 "create srq: problem copying data from user space\n");
1527 rc
= qedr_init_srq_user_params(udata
, srq
, &ureq
, 0);
1531 page_cnt
= srq
->usrq
.pbl_info
.num_pbes
;
1532 pbl_base_addr
= srq
->usrq
.pbl_tbl
->pa
;
1533 phy_prod_pair_addr
= hw_srq
->phy_prod_pair_addr
;
1534 page_size
= PAGE_SIZE
;
1536 struct qed_chain
*pbl
;
1538 rc
= qedr_alloc_srq_kernel_params(srq
, dev
, init_attr
);
1543 page_cnt
= qed_chain_get_page_cnt(pbl
);
1544 pbl_base_addr
= qed_chain_get_pbl_phys(pbl
);
1545 phy_prod_pair_addr
= hw_srq
->phy_prod_pair_addr
;
1546 page_size
= QED_CHAIN_PAGE_SIZE
;
1549 in_params
.pd_id
= pd
->pd_id
;
1550 in_params
.pbl_base_addr
= pbl_base_addr
;
1551 in_params
.prod_pair_addr
= phy_prod_pair_addr
;
1552 in_params
.num_pages
= page_cnt
;
1553 in_params
.page_size
= page_size
;
1555 rc
= dev
->ops
->rdma_create_srq(dev
->rdma_ctx
, &in_params
, &out_params
);
1559 srq
->srq_id
= out_params
.srq_id
;
1562 rc
= qedr_copy_srq_uresp(dev
, srq
, udata
);
1567 rc
= xa_insert_irq(&dev
->srqs
, srq
->srq_id
, srq
, GFP_KERNEL
);
1571 DP_DEBUG(dev
, QEDR_MSG_SRQ
,
1572 "create srq: created srq with srq_id=0x%0x\n", srq
->srq_id
);
1576 destroy_in_params
.srq_id
= srq
->srq_id
;
1578 dev
->ops
->rdma_destroy_srq(dev
->rdma_ctx
, &destroy_in_params
);
1581 qedr_free_srq_user_params(srq
);
1583 qedr_free_srq_kernel_params(srq
);
1588 void qedr_destroy_srq(struct ib_srq
*ibsrq
, struct ib_udata
*udata
)
1590 struct qed_rdma_destroy_srq_in_params in_params
= {};
1591 struct qedr_dev
*dev
= get_qedr_dev(ibsrq
->device
);
1592 struct qedr_srq
*srq
= get_qedr_srq(ibsrq
);
1594 xa_erase_irq(&dev
->srqs
, srq
->srq_id
);
1595 in_params
.srq_id
= srq
->srq_id
;
1596 dev
->ops
->rdma_destroy_srq(dev
->rdma_ctx
, &in_params
);
1599 qedr_free_srq_user_params(srq
);
1601 qedr_free_srq_kernel_params(srq
);
1603 DP_DEBUG(dev
, QEDR_MSG_SRQ
,
1604 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1608 int qedr_modify_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
,
1609 enum ib_srq_attr_mask attr_mask
, struct ib_udata
*udata
)
1611 struct qed_rdma_modify_srq_in_params in_params
= {};
1612 struct qedr_dev
*dev
= get_qedr_dev(ibsrq
->device
);
1613 struct qedr_srq
*srq
= get_qedr_srq(ibsrq
);
1616 if (attr_mask
& IB_SRQ_MAX_WR
) {
1618 "modify srq: invalid attribute mask=0x%x specified for %p\n",
1623 if (attr_mask
& IB_SRQ_LIMIT
) {
1624 if (attr
->srq_limit
>= srq
->hw_srq
.max_wr
) {
1626 "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1627 attr
->srq_limit
, srq
->hw_srq
.max_wr
);
1631 in_params
.srq_id
= srq
->srq_id
;
1632 in_params
.wqe_limit
= attr
->srq_limit
;
1633 rc
= dev
->ops
->rdma_modify_srq(dev
->rdma_ctx
, &in_params
);
1638 srq
->srq_limit
= attr
->srq_limit
;
1640 DP_DEBUG(dev
, QEDR_MSG_SRQ
,
1641 "modify srq: modified srq with srq_id=0x%0x\n", srq
->srq_id
);
1647 qedr_init_common_qp_in_params(struct qedr_dev
*dev
,
1650 struct ib_qp_init_attr
*attrs
,
1651 bool fmr_and_reserved_lkey
,
1652 struct qed_rdma_create_qp_in_params
*params
)
1654 /* QP handle to be written in an async event */
1655 params
->qp_handle_async_lo
= lower_32_bits((uintptr_t) qp
);
1656 params
->qp_handle_async_hi
= upper_32_bits((uintptr_t) qp
);
1658 params
->signal_all
= (attrs
->sq_sig_type
== IB_SIGNAL_ALL_WR
);
1659 params
->fmr_and_reserved_lkey
= fmr_and_reserved_lkey
;
1660 params
->pd
= pd
->pd_id
;
1661 params
->dpi
= pd
->uctx
? pd
->uctx
->dpi
: dev
->dpi
;
1662 params
->sq_cq_id
= get_qedr_cq(attrs
->send_cq
)->icid
;
1663 params
->stats_queue
= 0;
1665 params
->use_srq
= false;
1668 params
->rq_cq_id
= get_qedr_cq(attrs
->recv_cq
)->icid
;
1671 params
->rq_cq_id
= get_qedr_cq(attrs
->recv_cq
)->icid
;
1672 params
->srq_id
= qp
->srq
->srq_id
;
1673 params
->use_srq
= true;
1677 static inline void qedr_qp_user_print(struct qedr_dev
*dev
, struct qedr_qp
*qp
)
1679 DP_DEBUG(dev
, QEDR_MSG_QP
, "create qp: successfully created user QP. "
1688 qp
->usq
.buf_len
, qp
->urq
.buf_addr
, qp
->urq
.buf_len
);
1692 qedr_iwarp_populate_user_qp(struct qedr_dev
*dev
,
1694 struct qed_rdma_create_qp_out_params
*out_params
)
1696 qp
->usq
.pbl_tbl
->va
= out_params
->sq_pbl_virt
;
1697 qp
->usq
.pbl_tbl
->pa
= out_params
->sq_pbl_phys
;
1699 qedr_populate_pbls(dev
, qp
->usq
.umem
, qp
->usq
.pbl_tbl
,
1700 &qp
->usq
.pbl_info
, FW_PAGE_SHIFT
);
1702 qp
->urq
.pbl_tbl
->va
= out_params
->rq_pbl_virt
;
1703 qp
->urq
.pbl_tbl
->pa
= out_params
->rq_pbl_phys
;
1706 qedr_populate_pbls(dev
, qp
->urq
.umem
, qp
->urq
.pbl_tbl
,
1707 &qp
->urq
.pbl_info
, FW_PAGE_SHIFT
);
1710 static void qedr_cleanup_user(struct qedr_dev
*dev
,
1711 struct qedr_ucontext
*ctx
,
1714 ib_umem_release(qp
->usq
.umem
);
1715 qp
->usq
.umem
= NULL
;
1717 ib_umem_release(qp
->urq
.umem
);
1718 qp
->urq
.umem
= NULL
;
1720 if (rdma_protocol_roce(&dev
->ibdev
, 1)) {
1721 qedr_free_pbl(dev
, &qp
->usq
.pbl_info
, qp
->usq
.pbl_tbl
);
1722 qedr_free_pbl(dev
, &qp
->urq
.pbl_info
, qp
->urq
.pbl_tbl
);
1724 kfree(qp
->usq
.pbl_tbl
);
1725 kfree(qp
->urq
.pbl_tbl
);
1728 if (qp
->usq
.db_rec_data
) {
1729 qedr_db_recovery_del(dev
, qp
->usq
.db_addr
,
1730 &qp
->usq
.db_rec_data
->db_data
);
1731 rdma_user_mmap_entry_remove(qp
->usq
.db_mmap_entry
);
1734 if (qp
->urq
.db_rec_data
) {
1735 qedr_db_recovery_del(dev
, qp
->urq
.db_addr
,
1736 &qp
->urq
.db_rec_data
->db_data
);
1737 rdma_user_mmap_entry_remove(qp
->urq
.db_mmap_entry
);
1740 if (rdma_protocol_iwarp(&dev
->ibdev
, 1))
1741 qedr_db_recovery_del(dev
, qp
->urq
.db_rec_db2_addr
,
1742 &qp
->urq
.db_rec_db2_data
);
1745 static int qedr_create_user_qp(struct qedr_dev
*dev
,
1748 struct ib_udata
*udata
,
1749 struct ib_qp_init_attr
*attrs
)
1751 struct qed_rdma_create_qp_in_params in_params
;
1752 struct qed_rdma_create_qp_out_params out_params
;
1753 struct qedr_pd
*pd
= get_qedr_pd(ibpd
);
1754 struct qedr_create_qp_uresp uresp
;
1755 struct qedr_ucontext
*ctx
= NULL
;
1756 struct qedr_create_qp_ureq ureq
;
1757 int alloc_and_init
= rdma_protocol_roce(&dev
->ibdev
, 1);
1760 qp
->create_type
= QEDR_QP_CREATE_USER
;
1761 memset(&ureq
, 0, sizeof(ureq
));
1762 rc
= ib_copy_from_udata(&ureq
, udata
, min(sizeof(ureq
), udata
->inlen
));
1764 DP_ERR(dev
, "Problem copying data from user space\n");
1768 /* SQ - read access only (0) */
1769 rc
= qedr_init_user_queue(udata
, dev
, &qp
->usq
, ureq
.sq_addr
,
1770 ureq
.sq_len
, true, 0, alloc_and_init
);
1775 /* RQ - read access only (0) */
1776 rc
= qedr_init_user_queue(udata
, dev
, &qp
->urq
, ureq
.rq_addr
,
1777 ureq
.rq_len
, true, 0, alloc_and_init
);
1782 memset(&in_params
, 0, sizeof(in_params
));
1783 qedr_init_common_qp_in_params(dev
, pd
, qp
, attrs
, false, &in_params
);
1784 in_params
.qp_handle_lo
= ureq
.qp_handle_lo
;
1785 in_params
.qp_handle_hi
= ureq
.qp_handle_hi
;
1786 in_params
.sq_num_pages
= qp
->usq
.pbl_info
.num_pbes
;
1787 in_params
.sq_pbl_ptr
= qp
->usq
.pbl_tbl
->pa
;
1789 in_params
.rq_num_pages
= qp
->urq
.pbl_info
.num_pbes
;
1790 in_params
.rq_pbl_ptr
= qp
->urq
.pbl_tbl
->pa
;
1793 qp
->qed_qp
= dev
->ops
->rdma_create_qp(dev
->rdma_ctx
,
1794 &in_params
, &out_params
);
1801 if (rdma_protocol_iwarp(&dev
->ibdev
, 1))
1802 qedr_iwarp_populate_user_qp(dev
, qp
, &out_params
);
1804 qp
->qp_id
= out_params
.qp_id
;
1805 qp
->icid
= out_params
.icid
;
1807 rc
= qedr_copy_qp_uresp(dev
, qp
, udata
, &uresp
);
1811 /* db offset was calculated in copy_qp_uresp, now set in the user q */
1813 qp
->usq
.db_addr
= ctx
->dpi_addr
+ uresp
.sq_db_offset
;
1814 qp
->urq
.db_addr
= ctx
->dpi_addr
+ uresp
.rq_db_offset
;
1816 if (rdma_protocol_iwarp(&dev
->ibdev
, 1)) {
1817 qp
->urq
.db_rec_db2_addr
= ctx
->dpi_addr
+ uresp
.rq_db2_offset
;
1819 /* calculate the db_rec_db2 data since it is constant so no
1820 * need to reflect from user
1822 qp
->urq
.db_rec_db2_data
.data
.icid
= cpu_to_le16(qp
->icid
);
1823 qp
->urq
.db_rec_db2_data
.data
.value
=
1824 cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD
);
1827 rc
= qedr_db_recovery_add(dev
, qp
->usq
.db_addr
,
1828 &qp
->usq
.db_rec_data
->db_data
,
1834 rc
= qedr_db_recovery_add(dev
, qp
->urq
.db_addr
,
1835 &qp
->urq
.db_rec_data
->db_data
,
1841 if (rdma_protocol_iwarp(&dev
->ibdev
, 1)) {
1842 rc
= qedr_db_recovery_add(dev
, qp
->urq
.db_rec_db2_addr
,
1843 &qp
->urq
.db_rec_db2_data
,
1849 qedr_qp_user_print(dev
, qp
);
1853 rc
= dev
->ops
->rdma_destroy_qp(dev
->rdma_ctx
, qp
->qed_qp
);
1855 DP_ERR(dev
, "create qp: fatal fault. rc=%d", rc
);
1858 qedr_cleanup_user(dev
, ctx
, qp
);
1862 static int qedr_set_iwarp_db_info(struct qedr_dev
*dev
, struct qedr_qp
*qp
)
1866 qp
->sq
.db
= dev
->db_addr
+
1867 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD
);
1868 qp
->sq
.db_data
.data
.icid
= qp
->icid
;
1870 rc
= qedr_db_recovery_add(dev
, qp
->sq
.db
,
1877 qp
->rq
.db
= dev
->db_addr
+
1878 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD
);
1879 qp
->rq
.db_data
.data
.icid
= qp
->icid
;
1880 qp
->rq
.iwarp_db2
= dev
->db_addr
+
1881 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS
);
1882 qp
->rq
.iwarp_db2_data
.data
.icid
= qp
->icid
;
1883 qp
->rq
.iwarp_db2_data
.data
.value
= DQ_TCM_IWARP_POST_RQ_CF_CMD
;
1885 rc
= qedr_db_recovery_add(dev
, qp
->rq
.db
,
1892 rc
= qedr_db_recovery_add(dev
, qp
->rq
.iwarp_db2
,
1893 &qp
->rq
.iwarp_db2_data
,
1900 qedr_roce_create_kernel_qp(struct qedr_dev
*dev
,
1902 struct qed_rdma_create_qp_in_params
*in_params
,
1903 u32 n_sq_elems
, u32 n_rq_elems
)
1905 struct qed_rdma_create_qp_out_params out_params
;
1908 rc
= dev
->ops
->common
->chain_alloc(dev
->cdev
,
1909 QED_CHAIN_USE_TO_PRODUCE
,
1911 QED_CHAIN_CNT_TYPE_U32
,
1913 QEDR_SQE_ELEMENT_SIZE
,
1919 in_params
->sq_num_pages
= qed_chain_get_page_cnt(&qp
->sq
.pbl
);
1920 in_params
->sq_pbl_ptr
= qed_chain_get_pbl_phys(&qp
->sq
.pbl
);
1922 rc
= dev
->ops
->common
->chain_alloc(dev
->cdev
,
1923 QED_CHAIN_USE_TO_CONSUME_PRODUCE
,
1925 QED_CHAIN_CNT_TYPE_U32
,
1927 QEDR_RQE_ELEMENT_SIZE
,
1932 in_params
->rq_num_pages
= qed_chain_get_page_cnt(&qp
->rq
.pbl
);
1933 in_params
->rq_pbl_ptr
= qed_chain_get_pbl_phys(&qp
->rq
.pbl
);
1935 qp
->qed_qp
= dev
->ops
->rdma_create_qp(dev
->rdma_ctx
,
1936 in_params
, &out_params
);
1941 qp
->qp_id
= out_params
.qp_id
;
1942 qp
->icid
= out_params
.icid
;
1944 return qedr_set_roce_db_info(dev
, qp
);
1948 qedr_iwarp_create_kernel_qp(struct qedr_dev
*dev
,
1950 struct qed_rdma_create_qp_in_params
*in_params
,
1951 u32 n_sq_elems
, u32 n_rq_elems
)
1953 struct qed_rdma_create_qp_out_params out_params
;
1954 struct qed_chain_ext_pbl ext_pbl
;
1957 in_params
->sq_num_pages
= QED_CHAIN_PAGE_CNT(n_sq_elems
,
1958 QEDR_SQE_ELEMENT_SIZE
,
1959 QED_CHAIN_MODE_PBL
);
1960 in_params
->rq_num_pages
= QED_CHAIN_PAGE_CNT(n_rq_elems
,
1961 QEDR_RQE_ELEMENT_SIZE
,
1962 QED_CHAIN_MODE_PBL
);
1964 qp
->qed_qp
= dev
->ops
->rdma_create_qp(dev
->rdma_ctx
,
1965 in_params
, &out_params
);
1970 /* Now we allocate the chain */
1971 ext_pbl
.p_pbl_virt
= out_params
.sq_pbl_virt
;
1972 ext_pbl
.p_pbl_phys
= out_params
.sq_pbl_phys
;
1974 rc
= dev
->ops
->common
->chain_alloc(dev
->cdev
,
1975 QED_CHAIN_USE_TO_PRODUCE
,
1977 QED_CHAIN_CNT_TYPE_U32
,
1979 QEDR_SQE_ELEMENT_SIZE
,
1980 &qp
->sq
.pbl
, &ext_pbl
);
1985 ext_pbl
.p_pbl_virt
= out_params
.rq_pbl_virt
;
1986 ext_pbl
.p_pbl_phys
= out_params
.rq_pbl_phys
;
1988 rc
= dev
->ops
->common
->chain_alloc(dev
->cdev
,
1989 QED_CHAIN_USE_TO_CONSUME_PRODUCE
,
1991 QED_CHAIN_CNT_TYPE_U32
,
1993 QEDR_RQE_ELEMENT_SIZE
,
1994 &qp
->rq
.pbl
, &ext_pbl
);
1999 qp
->qp_id
= out_params
.qp_id
;
2000 qp
->icid
= out_params
.icid
;
2002 return qedr_set_iwarp_db_info(dev
, qp
);
2005 dev
->ops
->rdma_destroy_qp(dev
->rdma_ctx
, qp
->qed_qp
);
2010 static void qedr_cleanup_kernel(struct qedr_dev
*dev
, struct qedr_qp
*qp
)
2012 dev
->ops
->common
->chain_free(dev
->cdev
, &qp
->sq
.pbl
);
2013 kfree(qp
->wqe_wr_id
);
2015 dev
->ops
->common
->chain_free(dev
->cdev
, &qp
->rq
.pbl
);
2016 kfree(qp
->rqe_wr_id
);
2018 /* GSI qp is not registered to db mechanism so no need to delete */
2019 if (qp
->qp_type
== IB_QPT_GSI
)
2022 qedr_db_recovery_del(dev
, qp
->sq
.db
, &qp
->sq
.db_data
);
2025 qedr_db_recovery_del(dev
, qp
->rq
.db
, &qp
->rq
.db_data
);
2027 if (rdma_protocol_iwarp(&dev
->ibdev
, 1))
2028 qedr_db_recovery_del(dev
, qp
->rq
.iwarp_db2
,
2029 &qp
->rq
.iwarp_db2_data
);
2033 static int qedr_create_kernel_qp(struct qedr_dev
*dev
,
2036 struct ib_qp_init_attr
*attrs
)
2038 struct qed_rdma_create_qp_in_params in_params
;
2039 struct qedr_pd
*pd
= get_qedr_pd(ibpd
);
2045 memset(&in_params
, 0, sizeof(in_params
));
2046 qp
->create_type
= QEDR_QP_CREATE_KERNEL
;
2048 /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
2049 * the ring. The ring should allow at least a single WR, even if the
2050 * user requested none, due to allocation issues.
2051 * We should add an extra WR since the prod and cons indices of
2052 * wqe_wr_id are managed in such a way that the WQ is considered full
2053 * when (prod+1)%max_wr==cons. We currently don't do that because we
2054 * double the number of entries due an iSER issue that pushes far more
2055 * WRs than indicated. If we decline its ib_post_send() then we get
2056 * error prints in the dmesg we'd like to avoid.
2058 qp
->sq
.max_wr
= min_t(u32
, attrs
->cap
.max_send_wr
* dev
->wq_multiplier
,
2061 qp
->wqe_wr_id
= kcalloc(qp
->sq
.max_wr
, sizeof(*qp
->wqe_wr_id
),
2063 if (!qp
->wqe_wr_id
) {
2064 DP_ERR(dev
, "create qp: failed SQ shadow memory allocation\n");
2068 /* QP handle to be written in CQE */
2069 in_params
.qp_handle_lo
= lower_32_bits((uintptr_t) qp
);
2070 in_params
.qp_handle_hi
= upper_32_bits((uintptr_t) qp
);
2072 /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
2073 * the ring. There ring should allow at least a single WR, even if the
2074 * user requested none, due to allocation issues.
2076 qp
->rq
.max_wr
= (u16
) max_t(u32
, attrs
->cap
.max_recv_wr
, 1);
2078 /* Allocate driver internal RQ array */
2079 qp
->rqe_wr_id
= kcalloc(qp
->rq
.max_wr
, sizeof(*qp
->rqe_wr_id
),
2081 if (!qp
->rqe_wr_id
) {
2083 "create qp: failed RQ shadow memory allocation\n");
2084 kfree(qp
->wqe_wr_id
);
2088 qedr_init_common_qp_in_params(dev
, pd
, qp
, attrs
, true, &in_params
);
2090 n_sq_entries
= attrs
->cap
.max_send_wr
;
2091 n_sq_entries
= min_t(u32
, n_sq_entries
, dev
->attr
.max_sqe
);
2092 n_sq_entries
= max_t(u32
, n_sq_entries
, 1);
2093 n_sq_elems
= n_sq_entries
* QEDR_MAX_SQE_ELEMENTS_PER_SQE
;
2095 n_rq_elems
= qp
->rq
.max_wr
* QEDR_MAX_RQE_ELEMENTS_PER_RQE
;
2097 if (rdma_protocol_iwarp(&dev
->ibdev
, 1))
2098 rc
= qedr_iwarp_create_kernel_qp(dev
, qp
, &in_params
,
2099 n_sq_elems
, n_rq_elems
);
2101 rc
= qedr_roce_create_kernel_qp(dev
, qp
, &in_params
,
2102 n_sq_elems
, n_rq_elems
);
2104 qedr_cleanup_kernel(dev
, qp
);
2109 struct ib_qp
*qedr_create_qp(struct ib_pd
*ibpd
,
2110 struct ib_qp_init_attr
*attrs
,
2111 struct ib_udata
*udata
)
2113 struct qedr_dev
*dev
= get_qedr_dev(ibpd
->device
);
2114 struct qedr_pd
*pd
= get_qedr_pd(ibpd
);
2119 DP_DEBUG(dev
, QEDR_MSG_QP
, "create qp: called from %s, pd=%p\n",
2120 udata
? "user library" : "kernel", pd
);
2122 rc
= qedr_check_qp_attrs(ibpd
, dev
, attrs
, udata
);
2126 DP_DEBUG(dev
, QEDR_MSG_QP
,
2127 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
2128 udata
? "user library" : "kernel", attrs
->event_handler
, pd
,
2129 get_qedr_cq(attrs
->send_cq
),
2130 get_qedr_cq(attrs
->send_cq
)->icid
,
2131 get_qedr_cq(attrs
->recv_cq
),
2132 attrs
->recv_cq
? get_qedr_cq(attrs
->recv_cq
)->icid
: 0);
2134 qp
= kzalloc(sizeof(*qp
), GFP_KERNEL
);
2136 DP_ERR(dev
, "create qp: failed allocating memory\n");
2137 return ERR_PTR(-ENOMEM
);
2140 qedr_set_common_qp_params(dev
, qp
, pd
, attrs
);
2142 if (attrs
->qp_type
== IB_QPT_GSI
) {
2143 ibqp
= qedr_create_gsi_qp(dev
, attrs
, qp
);
2150 rc
= qedr_create_user_qp(dev
, qp
, ibpd
, udata
, attrs
);
2152 rc
= qedr_create_kernel_qp(dev
, qp
, ibpd
, attrs
);
2157 qp
->ibqp
.qp_num
= qp
->qp_id
;
2159 if (rdma_protocol_iwarp(&dev
->ibdev
, 1)) {
2160 rc
= xa_insert(&dev
->qps
, qp
->qp_id
, qp
, GFP_KERNEL
);
2170 return ERR_PTR(-EFAULT
);
2173 static enum ib_qp_state
qedr_get_ibqp_state(enum qed_roce_qp_state qp_state
)
2176 case QED_ROCE_QP_STATE_RESET
:
2177 return IB_QPS_RESET
;
2178 case QED_ROCE_QP_STATE_INIT
:
2180 case QED_ROCE_QP_STATE_RTR
:
2182 case QED_ROCE_QP_STATE_RTS
:
2184 case QED_ROCE_QP_STATE_SQD
:
2186 case QED_ROCE_QP_STATE_ERR
:
2188 case QED_ROCE_QP_STATE_SQE
:
2194 static enum qed_roce_qp_state
qedr_get_state_from_ibqp(
2195 enum ib_qp_state qp_state
)
2199 return QED_ROCE_QP_STATE_RESET
;
2201 return QED_ROCE_QP_STATE_INIT
;
2203 return QED_ROCE_QP_STATE_RTR
;
2205 return QED_ROCE_QP_STATE_RTS
;
2207 return QED_ROCE_QP_STATE_SQD
;
2209 return QED_ROCE_QP_STATE_ERR
;
2211 return QED_ROCE_QP_STATE_ERR
;
2215 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info
*qph
)
2217 qed_chain_reset(&qph
->pbl
);
2221 qph
->db_data
.data
.value
= cpu_to_le16(0);
2224 static int qedr_update_qp_state(struct qedr_dev
*dev
,
2226 enum qed_roce_qp_state cur_state
,
2227 enum qed_roce_qp_state new_state
)
2231 if (new_state
== cur_state
)
2234 switch (cur_state
) {
2235 case QED_ROCE_QP_STATE_RESET
:
2236 switch (new_state
) {
2237 case QED_ROCE_QP_STATE_INIT
:
2238 qp
->prev_wqe_size
= 0;
2239 qedr_reset_qp_hwq_info(&qp
->sq
);
2240 qedr_reset_qp_hwq_info(&qp
->rq
);
2247 case QED_ROCE_QP_STATE_INIT
:
2248 switch (new_state
) {
2249 case QED_ROCE_QP_STATE_RTR
:
2250 /* Update doorbell (in case post_recv was
2251 * done before move to RTR)
2254 if (rdma_protocol_roce(&dev
->ibdev
, 1)) {
2255 writel(qp
->rq
.db_data
.raw
, qp
->rq
.db
);
2258 case QED_ROCE_QP_STATE_ERR
:
2261 /* Invalid state change. */
2266 case QED_ROCE_QP_STATE_RTR
:
2268 switch (new_state
) {
2269 case QED_ROCE_QP_STATE_RTS
:
2271 case QED_ROCE_QP_STATE_ERR
:
2274 /* Invalid state change. */
2279 case QED_ROCE_QP_STATE_RTS
:
2281 switch (new_state
) {
2282 case QED_ROCE_QP_STATE_SQD
:
2284 case QED_ROCE_QP_STATE_ERR
:
2287 /* Invalid state change. */
2292 case QED_ROCE_QP_STATE_SQD
:
2294 switch (new_state
) {
2295 case QED_ROCE_QP_STATE_RTS
:
2296 case QED_ROCE_QP_STATE_ERR
:
2299 /* Invalid state change. */
2304 case QED_ROCE_QP_STATE_ERR
:
2306 switch (new_state
) {
2307 case QED_ROCE_QP_STATE_RESET
:
2308 if ((qp
->rq
.prod
!= qp
->rq
.cons
) ||
2309 (qp
->sq
.prod
!= qp
->sq
.cons
)) {
2311 "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2312 qp
->rq
.prod
, qp
->rq
.cons
, qp
->sq
.prod
,
2330 int qedr_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
2331 int attr_mask
, struct ib_udata
*udata
)
2333 struct qedr_qp
*qp
= get_qedr_qp(ibqp
);
2334 struct qed_rdma_modify_qp_in_params qp_params
= { 0 };
2335 struct qedr_dev
*dev
= get_qedr_dev(&qp
->dev
->ibdev
);
2336 const struct ib_global_route
*grh
= rdma_ah_read_grh(&attr
->ah_attr
);
2337 enum ib_qp_state old_qp_state
, new_qp_state
;
2338 enum qed_roce_qp_state cur_state
;
2341 DP_DEBUG(dev
, QEDR_MSG_QP
,
2342 "modify qp: qp %p attr_mask=0x%x, state=%d", qp
, attr_mask
,
2345 old_qp_state
= qedr_get_ibqp_state(qp
->state
);
2346 if (attr_mask
& IB_QP_STATE
)
2347 new_qp_state
= attr
->qp_state
;
2349 new_qp_state
= old_qp_state
;
2351 if (rdma_protocol_roce(&dev
->ibdev
, 1)) {
2352 if (!ib_modify_qp_is_ok(old_qp_state
, new_qp_state
,
2353 ibqp
->qp_type
, attr_mask
)) {
2355 "modify qp: invalid attribute mask=0x%x specified for\n"
2356 "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2357 attr_mask
, qp
->qp_id
, ibqp
->qp_type
,
2358 old_qp_state
, new_qp_state
);
2364 /* Translate the masks... */
2365 if (attr_mask
& IB_QP_STATE
) {
2366 SET_FIELD(qp_params
.modify_flags
,
2367 QED_RDMA_MODIFY_QP_VALID_NEW_STATE
, 1);
2368 qp_params
.new_state
= qedr_get_state_from_ibqp(attr
->qp_state
);
2371 if (attr_mask
& IB_QP_EN_SQD_ASYNC_NOTIFY
)
2372 qp_params
.sqd_async
= true;
2374 if (attr_mask
& IB_QP_PKEY_INDEX
) {
2375 SET_FIELD(qp_params
.modify_flags
,
2376 QED_ROCE_MODIFY_QP_VALID_PKEY
, 1);
2377 if (attr
->pkey_index
>= QEDR_ROCE_PKEY_TABLE_LEN
) {
2382 qp_params
.pkey
= QEDR_ROCE_PKEY_DEFAULT
;
2385 if (attr_mask
& IB_QP_QKEY
)
2386 qp
->qkey
= attr
->qkey
;
2388 if (attr_mask
& IB_QP_ACCESS_FLAGS
) {
2389 SET_FIELD(qp_params
.modify_flags
,
2390 QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN
, 1);
2391 qp_params
.incoming_rdma_read_en
= attr
->qp_access_flags
&
2392 IB_ACCESS_REMOTE_READ
;
2393 qp_params
.incoming_rdma_write_en
= attr
->qp_access_flags
&
2394 IB_ACCESS_REMOTE_WRITE
;
2395 qp_params
.incoming_atomic_en
= attr
->qp_access_flags
&
2396 IB_ACCESS_REMOTE_ATOMIC
;
2399 if (attr_mask
& (IB_QP_AV
| IB_QP_PATH_MTU
)) {
2400 if (rdma_protocol_iwarp(&dev
->ibdev
, 1))
2403 if (attr_mask
& IB_QP_PATH_MTU
) {
2404 if (attr
->path_mtu
< IB_MTU_256
||
2405 attr
->path_mtu
> IB_MTU_4096
) {
2406 pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2410 qp
->mtu
= min(ib_mtu_enum_to_int(attr
->path_mtu
),
2411 ib_mtu_enum_to_int(iboe_get_mtu
2417 ib_mtu_enum_to_int(iboe_get_mtu(dev
->ndev
->mtu
));
2418 pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp
->mtu
);
2421 SET_FIELD(qp_params
.modify_flags
,
2422 QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR
, 1);
2424 qp_params
.traffic_class_tos
= grh
->traffic_class
;
2425 qp_params
.flow_label
= grh
->flow_label
;
2426 qp_params
.hop_limit_ttl
= grh
->hop_limit
;
2428 qp
->sgid_idx
= grh
->sgid_index
;
2430 rc
= get_gid_info_from_table(ibqp
, attr
, attr_mask
, &qp_params
);
2433 "modify qp: problems with GID index %d (rc=%d)\n",
2434 grh
->sgid_index
, rc
);
2438 rc
= qedr_get_dmac(dev
, &attr
->ah_attr
,
2439 qp_params
.remote_mac_addr
);
2443 qp_params
.use_local_mac
= true;
2444 ether_addr_copy(qp_params
.local_mac_addr
, dev
->ndev
->dev_addr
);
2446 DP_DEBUG(dev
, QEDR_MSG_QP
, "dgid=%x:%x:%x:%x\n",
2447 qp_params
.dgid
.dwords
[0], qp_params
.dgid
.dwords
[1],
2448 qp_params
.dgid
.dwords
[2], qp_params
.dgid
.dwords
[3]);
2449 DP_DEBUG(dev
, QEDR_MSG_QP
, "sgid=%x:%x:%x:%x\n",
2450 qp_params
.sgid
.dwords
[0], qp_params
.sgid
.dwords
[1],
2451 qp_params
.sgid
.dwords
[2], qp_params
.sgid
.dwords
[3]);
2452 DP_DEBUG(dev
, QEDR_MSG_QP
, "remote_mac=[%pM]\n",
2453 qp_params
.remote_mac_addr
);
2455 qp_params
.mtu
= qp
->mtu
;
2456 qp_params
.lb_indication
= false;
2459 if (!qp_params
.mtu
) {
2460 /* Stay with current MTU */
2462 qp_params
.mtu
= qp
->mtu
;
2465 ib_mtu_enum_to_int(iboe_get_mtu(dev
->ndev
->mtu
));
2468 if (attr_mask
& IB_QP_TIMEOUT
) {
2469 SET_FIELD(qp_params
.modify_flags
,
2470 QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT
, 1);
2472 /* The received timeout value is an exponent used like this:
2473 * "12.7.34 LOCAL ACK TIMEOUT
2474 * Value representing the transport (ACK) timeout for use by
2475 * the remote, expressed as: 4.096 * 2^timeout [usec]"
2476 * The FW expects timeout in msec so we need to divide the usec
2477 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2478 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2479 * The value of zero means infinite so we use a 'max_t' to make
2480 * sure that sub 1 msec values will be configured as 1 msec.
2483 qp_params
.ack_timeout
=
2484 1 << max_t(int, attr
->timeout
- 8, 0);
2486 qp_params
.ack_timeout
= 0;
2489 if (attr_mask
& IB_QP_RETRY_CNT
) {
2490 SET_FIELD(qp_params
.modify_flags
,
2491 QED_ROCE_MODIFY_QP_VALID_RETRY_CNT
, 1);
2492 qp_params
.retry_cnt
= attr
->retry_cnt
;
2495 if (attr_mask
& IB_QP_RNR_RETRY
) {
2496 SET_FIELD(qp_params
.modify_flags
,
2497 QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT
, 1);
2498 qp_params
.rnr_retry_cnt
= attr
->rnr_retry
;
2501 if (attr_mask
& IB_QP_RQ_PSN
) {
2502 SET_FIELD(qp_params
.modify_flags
,
2503 QED_ROCE_MODIFY_QP_VALID_RQ_PSN
, 1);
2504 qp_params
.rq_psn
= attr
->rq_psn
;
2505 qp
->rq_psn
= attr
->rq_psn
;
2508 if (attr_mask
& IB_QP_MAX_QP_RD_ATOMIC
) {
2509 if (attr
->max_rd_atomic
> dev
->attr
.max_qp_req_rd_atomic_resc
) {
2512 "unsupported max_rd_atomic=%d, supported=%d\n",
2513 attr
->max_rd_atomic
,
2514 dev
->attr
.max_qp_req_rd_atomic_resc
);
2518 SET_FIELD(qp_params
.modify_flags
,
2519 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ
, 1);
2520 qp_params
.max_rd_atomic_req
= attr
->max_rd_atomic
;
2523 if (attr_mask
& IB_QP_MIN_RNR_TIMER
) {
2524 SET_FIELD(qp_params
.modify_flags
,
2525 QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER
, 1);
2526 qp_params
.min_rnr_nak_timer
= attr
->min_rnr_timer
;
2529 if (attr_mask
& IB_QP_SQ_PSN
) {
2530 SET_FIELD(qp_params
.modify_flags
,
2531 QED_ROCE_MODIFY_QP_VALID_SQ_PSN
, 1);
2532 qp_params
.sq_psn
= attr
->sq_psn
;
2533 qp
->sq_psn
= attr
->sq_psn
;
2536 if (attr_mask
& IB_QP_MAX_DEST_RD_ATOMIC
) {
2537 if (attr
->max_dest_rd_atomic
>
2538 dev
->attr
.max_qp_resp_rd_atomic_resc
) {
2540 "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2541 attr
->max_dest_rd_atomic
,
2542 dev
->attr
.max_qp_resp_rd_atomic_resc
);
2548 SET_FIELD(qp_params
.modify_flags
,
2549 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP
, 1);
2550 qp_params
.max_rd_atomic_resp
= attr
->max_dest_rd_atomic
;
2553 if (attr_mask
& IB_QP_DEST_QPN
) {
2554 SET_FIELD(qp_params
.modify_flags
,
2555 QED_ROCE_MODIFY_QP_VALID_DEST_QP
, 1);
2557 qp_params
.dest_qp
= attr
->dest_qp_num
;
2558 qp
->dest_qp_num
= attr
->dest_qp_num
;
2561 cur_state
= qp
->state
;
2563 /* Update the QP state before the actual ramrod to prevent a race with
2564 * fast path. Modifying the QP state to error will cause the device to
2565 * flush the CQEs and while polling the flushed CQEs will considered as
2566 * a potential issue if the QP isn't in error state.
2568 if ((attr_mask
& IB_QP_STATE
) && qp
->qp_type
!= IB_QPT_GSI
&&
2569 !udata
&& qp_params
.new_state
== QED_ROCE_QP_STATE_ERR
)
2570 qp
->state
= QED_ROCE_QP_STATE_ERR
;
2572 if (qp
->qp_type
!= IB_QPT_GSI
)
2573 rc
= dev
->ops
->rdma_modify_qp(dev
->rdma_ctx
,
2574 qp
->qed_qp
, &qp_params
);
2576 if (attr_mask
& IB_QP_STATE
) {
2577 if ((qp
->qp_type
!= IB_QPT_GSI
) && (!udata
))
2578 rc
= qedr_update_qp_state(dev
, qp
, cur_state
,
2579 qp_params
.new_state
);
2580 qp
->state
= qp_params
.new_state
;
2587 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params
*params
)
2589 int ib_qp_acc_flags
= 0;
2591 if (params
->incoming_rdma_write_en
)
2592 ib_qp_acc_flags
|= IB_ACCESS_REMOTE_WRITE
;
2593 if (params
->incoming_rdma_read_en
)
2594 ib_qp_acc_flags
|= IB_ACCESS_REMOTE_READ
;
2595 if (params
->incoming_atomic_en
)
2596 ib_qp_acc_flags
|= IB_ACCESS_REMOTE_ATOMIC
;
2597 ib_qp_acc_flags
|= IB_ACCESS_LOCAL_WRITE
;
2598 return ib_qp_acc_flags
;
2601 int qedr_query_qp(struct ib_qp
*ibqp
,
2602 struct ib_qp_attr
*qp_attr
,
2603 int attr_mask
, struct ib_qp_init_attr
*qp_init_attr
)
2605 struct qed_rdma_query_qp_out_params params
;
2606 struct qedr_qp
*qp
= get_qedr_qp(ibqp
);
2607 struct qedr_dev
*dev
= qp
->dev
;
2610 memset(¶ms
, 0, sizeof(params
));
2612 rc
= dev
->ops
->rdma_query_qp(dev
->rdma_ctx
, qp
->qed_qp
, ¶ms
);
2616 memset(qp_attr
, 0, sizeof(*qp_attr
));
2617 memset(qp_init_attr
, 0, sizeof(*qp_init_attr
));
2619 qp_attr
->qp_state
= qedr_get_ibqp_state(params
.state
);
2620 qp_attr
->cur_qp_state
= qedr_get_ibqp_state(params
.state
);
2621 qp_attr
->path_mtu
= ib_mtu_int_to_enum(params
.mtu
);
2622 qp_attr
->path_mig_state
= IB_MIG_MIGRATED
;
2623 qp_attr
->rq_psn
= params
.rq_psn
;
2624 qp_attr
->sq_psn
= params
.sq_psn
;
2625 qp_attr
->dest_qp_num
= params
.dest_qp
;
2627 qp_attr
->qp_access_flags
= qedr_to_ib_qp_acc_flags(¶ms
);
2629 qp_attr
->cap
.max_send_wr
= qp
->sq
.max_wr
;
2630 qp_attr
->cap
.max_recv_wr
= qp
->rq
.max_wr
;
2631 qp_attr
->cap
.max_send_sge
= qp
->sq
.max_sges
;
2632 qp_attr
->cap
.max_recv_sge
= qp
->rq
.max_sges
;
2633 qp_attr
->cap
.max_inline_data
= ROCE_REQ_MAX_INLINE_DATA_SIZE
;
2634 qp_init_attr
->cap
= qp_attr
->cap
;
2636 qp_attr
->ah_attr
.type
= RDMA_AH_ATTR_TYPE_ROCE
;
2637 rdma_ah_set_grh(&qp_attr
->ah_attr
, NULL
,
2638 params
.flow_label
, qp
->sgid_idx
,
2639 params
.hop_limit_ttl
, params
.traffic_class_tos
);
2640 rdma_ah_set_dgid_raw(&qp_attr
->ah_attr
, ¶ms
.dgid
.bytes
[0]);
2641 rdma_ah_set_port_num(&qp_attr
->ah_attr
, 1);
2642 rdma_ah_set_sl(&qp_attr
->ah_attr
, 0);
2643 qp_attr
->timeout
= params
.timeout
;
2644 qp_attr
->rnr_retry
= params
.rnr_retry
;
2645 qp_attr
->retry_cnt
= params
.retry_cnt
;
2646 qp_attr
->min_rnr_timer
= params
.min_rnr_nak_timer
;
2647 qp_attr
->pkey_index
= params
.pkey_index
;
2648 qp_attr
->port_num
= 1;
2649 rdma_ah_set_path_bits(&qp_attr
->ah_attr
, 0);
2650 rdma_ah_set_static_rate(&qp_attr
->ah_attr
, 0);
2651 qp_attr
->alt_pkey_index
= 0;
2652 qp_attr
->alt_port_num
= 0;
2653 qp_attr
->alt_timeout
= 0;
2654 memset(&qp_attr
->alt_ah_attr
, 0, sizeof(qp_attr
->alt_ah_attr
));
2656 qp_attr
->sq_draining
= (params
.state
== QED_ROCE_QP_STATE_SQD
) ? 1 : 0;
2657 qp_attr
->max_dest_rd_atomic
= params
.max_dest_rd_atomic
;
2658 qp_attr
->max_rd_atomic
= params
.max_rd_atomic
;
2659 qp_attr
->en_sqd_async_notify
= (params
.sqd_async
) ? 1 : 0;
2661 DP_DEBUG(dev
, QEDR_MSG_QP
, "QEDR_QUERY_QP: max_inline_data=%d\n",
2662 qp_attr
->cap
.max_inline_data
);
2668 static int qedr_free_qp_resources(struct qedr_dev
*dev
, struct qedr_qp
*qp
,
2669 struct ib_udata
*udata
)
2671 struct qedr_ucontext
*ctx
=
2672 rdma_udata_to_drv_context(udata
, struct qedr_ucontext
,
2676 if (qp
->qp_type
!= IB_QPT_GSI
) {
2677 rc
= dev
->ops
->rdma_destroy_qp(dev
->rdma_ctx
, qp
->qed_qp
);
2682 if (qp
->create_type
== QEDR_QP_CREATE_USER
)
2683 qedr_cleanup_user(dev
, ctx
, qp
);
2685 qedr_cleanup_kernel(dev
, qp
);
2690 int qedr_destroy_qp(struct ib_qp
*ibqp
, struct ib_udata
*udata
)
2692 struct qedr_qp
*qp
= get_qedr_qp(ibqp
);
2693 struct qedr_dev
*dev
= qp
->dev
;
2694 struct ib_qp_attr attr
;
2697 DP_DEBUG(dev
, QEDR_MSG_QP
, "destroy qp: destroying %p, qp type=%d\n",
2700 if (rdma_protocol_roce(&dev
->ibdev
, 1)) {
2701 if ((qp
->state
!= QED_ROCE_QP_STATE_RESET
) &&
2702 (qp
->state
!= QED_ROCE_QP_STATE_ERR
) &&
2703 (qp
->state
!= QED_ROCE_QP_STATE_INIT
)) {
2705 attr
.qp_state
= IB_QPS_ERR
;
2706 attr_mask
|= IB_QP_STATE
;
2708 /* Change the QP state to ERROR */
2709 qedr_modify_qp(ibqp
, &attr
, attr_mask
, NULL
);
2712 /* If connection establishment started the WAIT_FOR_CONNECT
2713 * bit will be on and we need to Wait for the establishment
2714 * to complete before destroying the qp.
2716 if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT
,
2717 &qp
->iwarp_cm_flags
))
2718 wait_for_completion(&qp
->iwarp_cm_comp
);
2720 /* If graceful disconnect started, the WAIT_FOR_DISCONNECT
2721 * bit will be on, and we need to wait for the disconnect to
2722 * complete before continuing. We can use the same completion,
2723 * iwarp_cm_comp, since this is the only place that waits for
2724 * this completion and it is sequential. In addition,
2725 * disconnect can't occur before the connection is fully
2726 * established, therefore if WAIT_FOR_DISCONNECT is on it
2727 * means WAIT_FOR_CONNECT is also on and the completion for
2728 * CONNECT already occurred.
2730 if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT
,
2731 &qp
->iwarp_cm_flags
))
2732 wait_for_completion(&qp
->iwarp_cm_comp
);
2735 if (qp
->qp_type
== IB_QPT_GSI
)
2736 qedr_destroy_gsi_qp(dev
);
2738 /* We need to remove the entry from the xarray before we release the
2739 * qp_id to avoid a race of the qp_id being reallocated and failing
2742 if (rdma_protocol_iwarp(&dev
->ibdev
, 1))
2743 xa_erase(&dev
->qps
, qp
->qp_id
);
2745 qedr_free_qp_resources(dev
, qp
, udata
);
2747 if (rdma_protocol_iwarp(&dev
->ibdev
, 1))
2748 qedr_iw_qp_rem_ref(&qp
->ibqp
);
2753 int qedr_create_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
, u32 flags
,
2754 struct ib_udata
*udata
)
2756 struct qedr_ah
*ah
= get_qedr_ah(ibah
);
2758 rdma_copy_ah_attr(&ah
->attr
, attr
);
2763 void qedr_destroy_ah(struct ib_ah
*ibah
, u32 flags
)
2765 struct qedr_ah
*ah
= get_qedr_ah(ibah
);
2767 rdma_destroy_ah_attr(&ah
->attr
);
2770 static void free_mr_info(struct qedr_dev
*dev
, struct mr_info
*info
)
2772 struct qedr_pbl
*pbl
, *tmp
;
2774 if (info
->pbl_table
)
2775 list_add_tail(&info
->pbl_table
->list_entry
,
2776 &info
->free_pbl_list
);
2778 if (!list_empty(&info
->inuse_pbl_list
))
2779 list_splice(&info
->inuse_pbl_list
, &info
->free_pbl_list
);
2781 list_for_each_entry_safe(pbl
, tmp
, &info
->free_pbl_list
, list_entry
) {
2782 list_del(&pbl
->list_entry
);
2783 qedr_free_pbl(dev
, &info
->pbl_info
, pbl
);
2787 static int init_mr_info(struct qedr_dev
*dev
, struct mr_info
*info
,
2788 size_t page_list_len
, bool two_layered
)
2790 struct qedr_pbl
*tmp
;
2793 INIT_LIST_HEAD(&info
->free_pbl_list
);
2794 INIT_LIST_HEAD(&info
->inuse_pbl_list
);
2796 rc
= qedr_prepare_pbl_tbl(dev
, &info
->pbl_info
,
2797 page_list_len
, two_layered
);
2801 info
->pbl_table
= qedr_alloc_pbl_tbl(dev
, &info
->pbl_info
, GFP_KERNEL
);
2802 if (IS_ERR(info
->pbl_table
)) {
2803 rc
= PTR_ERR(info
->pbl_table
);
2807 DP_DEBUG(dev
, QEDR_MSG_MR
, "pbl_table_pa = %pa\n",
2808 &info
->pbl_table
->pa
);
2810 /* in usual case we use 2 PBLs, so we add one to free
2811 * list and allocating another one
2813 tmp
= qedr_alloc_pbl_tbl(dev
, &info
->pbl_info
, GFP_KERNEL
);
2815 DP_DEBUG(dev
, QEDR_MSG_MR
, "Extra PBL is not allocated\n");
2819 list_add_tail(&tmp
->list_entry
, &info
->free_pbl_list
);
2821 DP_DEBUG(dev
, QEDR_MSG_MR
, "extra pbl_table_pa = %pa\n", &tmp
->pa
);
2825 free_mr_info(dev
, info
);
2830 struct ib_mr
*qedr_reg_user_mr(struct ib_pd
*ibpd
, u64 start
, u64 len
,
2831 u64 usr_addr
, int acc
, struct ib_udata
*udata
)
2833 struct qedr_dev
*dev
= get_qedr_dev(ibpd
->device
);
2838 pd
= get_qedr_pd(ibpd
);
2839 DP_DEBUG(dev
, QEDR_MSG_MR
,
2840 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2841 pd
->pd_id
, start
, len
, usr_addr
, acc
);
2843 if (acc
& IB_ACCESS_REMOTE_WRITE
&& !(acc
& IB_ACCESS_LOCAL_WRITE
))
2844 return ERR_PTR(-EINVAL
);
2846 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
2850 mr
->type
= QEDR_MR_USER
;
2852 mr
->umem
= ib_umem_get(ibpd
->device
, start
, len
, acc
);
2853 if (IS_ERR(mr
->umem
)) {
2858 rc
= init_mr_info(dev
, &mr
->info
, ib_umem_page_count(mr
->umem
), 1);
2862 qedr_populate_pbls(dev
, mr
->umem
, mr
->info
.pbl_table
,
2863 &mr
->info
.pbl_info
, PAGE_SHIFT
);
2865 rc
= dev
->ops
->rdma_alloc_tid(dev
->rdma_ctx
, &mr
->hw_mr
.itid
);
2867 DP_ERR(dev
, "roce alloc tid returned an error %d\n", rc
);
2871 /* Index only, 18 bit long, lkey = itid << 8 | key */
2872 mr
->hw_mr
.tid_type
= QED_RDMA_TID_REGISTERED_MR
;
2874 mr
->hw_mr
.pd
= pd
->pd_id
;
2875 mr
->hw_mr
.local_read
= 1;
2876 mr
->hw_mr
.local_write
= (acc
& IB_ACCESS_LOCAL_WRITE
) ? 1 : 0;
2877 mr
->hw_mr
.remote_read
= (acc
& IB_ACCESS_REMOTE_READ
) ? 1 : 0;
2878 mr
->hw_mr
.remote_write
= (acc
& IB_ACCESS_REMOTE_WRITE
) ? 1 : 0;
2879 mr
->hw_mr
.remote_atomic
= (acc
& IB_ACCESS_REMOTE_ATOMIC
) ? 1 : 0;
2880 mr
->hw_mr
.mw_bind
= false;
2881 mr
->hw_mr
.pbl_ptr
= mr
->info
.pbl_table
[0].pa
;
2882 mr
->hw_mr
.pbl_two_level
= mr
->info
.pbl_info
.two_layered
;
2883 mr
->hw_mr
.pbl_page_size_log
= ilog2(mr
->info
.pbl_info
.pbl_size
);
2884 mr
->hw_mr
.page_size_log
= PAGE_SHIFT
;
2885 mr
->hw_mr
.fbo
= ib_umem_offset(mr
->umem
);
2886 mr
->hw_mr
.length
= len
;
2887 mr
->hw_mr
.vaddr
= usr_addr
;
2888 mr
->hw_mr
.zbva
= false;
2889 mr
->hw_mr
.phy_mr
= false;
2890 mr
->hw_mr
.dma_mr
= false;
2892 rc
= dev
->ops
->rdma_register_tid(dev
->rdma_ctx
, &mr
->hw_mr
);
2894 DP_ERR(dev
, "roce register tid returned an error %d\n", rc
);
2898 mr
->ibmr
.lkey
= mr
->hw_mr
.itid
<< 8 | mr
->hw_mr
.key
;
2899 if (mr
->hw_mr
.remote_write
|| mr
->hw_mr
.remote_read
||
2900 mr
->hw_mr
.remote_atomic
)
2901 mr
->ibmr
.rkey
= mr
->hw_mr
.itid
<< 8 | mr
->hw_mr
.key
;
2903 DP_DEBUG(dev
, QEDR_MSG_MR
, "register user mr lkey: %x\n",
2908 dev
->ops
->rdma_free_tid(dev
->rdma_ctx
, mr
->hw_mr
.itid
);
2910 qedr_free_pbl(dev
, &mr
->info
.pbl_info
, mr
->info
.pbl_table
);
2916 int qedr_dereg_mr(struct ib_mr
*ib_mr
, struct ib_udata
*udata
)
2918 struct qedr_mr
*mr
= get_qedr_mr(ib_mr
);
2919 struct qedr_dev
*dev
= get_qedr_dev(ib_mr
->device
);
2922 rc
= dev
->ops
->rdma_deregister_tid(dev
->rdma_ctx
, mr
->hw_mr
.itid
);
2926 dev
->ops
->rdma_free_tid(dev
->rdma_ctx
, mr
->hw_mr
.itid
);
2928 if (mr
->type
!= QEDR_MR_DMA
)
2929 free_mr_info(dev
, &mr
->info
);
2931 /* it could be user registered memory. */
2932 ib_umem_release(mr
->umem
);
2939 static struct qedr_mr
*__qedr_alloc_mr(struct ib_pd
*ibpd
,
2940 int max_page_list_len
)
2942 struct qedr_pd
*pd
= get_qedr_pd(ibpd
);
2943 struct qedr_dev
*dev
= get_qedr_dev(ibpd
->device
);
2947 DP_DEBUG(dev
, QEDR_MSG_MR
,
2948 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd
->pd_id
,
2951 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
2956 mr
->type
= QEDR_MR_FRMR
;
2958 rc
= init_mr_info(dev
, &mr
->info
, max_page_list_len
, 1);
2962 rc
= dev
->ops
->rdma_alloc_tid(dev
->rdma_ctx
, &mr
->hw_mr
.itid
);
2964 DP_ERR(dev
, "roce alloc tid returned an error %d\n", rc
);
2968 /* Index only, 18 bit long, lkey = itid << 8 | key */
2969 mr
->hw_mr
.tid_type
= QED_RDMA_TID_FMR
;
2971 mr
->hw_mr
.pd
= pd
->pd_id
;
2972 mr
->hw_mr
.local_read
= 1;
2973 mr
->hw_mr
.local_write
= 0;
2974 mr
->hw_mr
.remote_read
= 0;
2975 mr
->hw_mr
.remote_write
= 0;
2976 mr
->hw_mr
.remote_atomic
= 0;
2977 mr
->hw_mr
.mw_bind
= false;
2978 mr
->hw_mr
.pbl_ptr
= 0;
2979 mr
->hw_mr
.pbl_two_level
= mr
->info
.pbl_info
.two_layered
;
2980 mr
->hw_mr
.pbl_page_size_log
= ilog2(mr
->info
.pbl_info
.pbl_size
);
2982 mr
->hw_mr
.length
= 0;
2983 mr
->hw_mr
.vaddr
= 0;
2984 mr
->hw_mr
.zbva
= false;
2985 mr
->hw_mr
.phy_mr
= true;
2986 mr
->hw_mr
.dma_mr
= false;
2988 rc
= dev
->ops
->rdma_register_tid(dev
->rdma_ctx
, &mr
->hw_mr
);
2990 DP_ERR(dev
, "roce register tid returned an error %d\n", rc
);
2994 mr
->ibmr
.lkey
= mr
->hw_mr
.itid
<< 8 | mr
->hw_mr
.key
;
2995 mr
->ibmr
.rkey
= mr
->ibmr
.lkey
;
2997 DP_DEBUG(dev
, QEDR_MSG_MR
, "alloc frmr: %x\n", mr
->ibmr
.lkey
);
3001 dev
->ops
->rdma_free_tid(dev
->rdma_ctx
, mr
->hw_mr
.itid
);
3007 struct ib_mr
*qedr_alloc_mr(struct ib_pd
*ibpd
, enum ib_mr_type mr_type
,
3008 u32 max_num_sg
, struct ib_udata
*udata
)
3012 if (mr_type
!= IB_MR_TYPE_MEM_REG
)
3013 return ERR_PTR(-EINVAL
);
3015 mr
= __qedr_alloc_mr(ibpd
, max_num_sg
);
3018 return ERR_PTR(-EINVAL
);
3023 static int qedr_set_page(struct ib_mr
*ibmr
, u64 addr
)
3025 struct qedr_mr
*mr
= get_qedr_mr(ibmr
);
3026 struct qedr_pbl
*pbl_table
;
3027 struct regpair
*pbe
;
3030 if (unlikely(mr
->npages
== mr
->info
.pbl_info
.num_pbes
)) {
3031 DP_ERR(mr
->dev
, "qedr_set_page fails when %d\n", mr
->npages
);
3035 DP_DEBUG(mr
->dev
, QEDR_MSG_MR
, "qedr_set_page pages[%d] = 0x%llx\n",
3038 pbes_in_page
= mr
->info
.pbl_info
.pbl_size
/ sizeof(u64
);
3039 pbl_table
= mr
->info
.pbl_table
+ (mr
->npages
/ pbes_in_page
);
3040 pbe
= (struct regpair
*)pbl_table
->va
;
3041 pbe
+= mr
->npages
% pbes_in_page
;
3042 pbe
->lo
= cpu_to_le32((u32
)addr
);
3043 pbe
->hi
= cpu_to_le32((u32
)upper_32_bits(addr
));
3050 static void handle_completed_mrs(struct qedr_dev
*dev
, struct mr_info
*info
)
3052 int work
= info
->completed
- info
->completed_handled
- 1;
3054 DP_DEBUG(dev
, QEDR_MSG_MR
, "Special FMR work = %d\n", work
);
3055 while (work
-- > 0 && !list_empty(&info
->inuse_pbl_list
)) {
3056 struct qedr_pbl
*pbl
;
3058 /* Free all the page list that are possible to be freed
3059 * (all the ones that were invalidated), under the assumption
3060 * that if an FMR was completed successfully that means that
3061 * if there was an invalidate operation before it also ended
3063 pbl
= list_first_entry(&info
->inuse_pbl_list
,
3064 struct qedr_pbl
, list_entry
);
3065 list_move_tail(&pbl
->list_entry
, &info
->free_pbl_list
);
3066 info
->completed_handled
++;
3070 int qedr_map_mr_sg(struct ib_mr
*ibmr
, struct scatterlist
*sg
,
3071 int sg_nents
, unsigned int *sg_offset
)
3073 struct qedr_mr
*mr
= get_qedr_mr(ibmr
);
3077 handle_completed_mrs(mr
->dev
, &mr
->info
);
3078 return ib_sg_to_pages(ibmr
, sg
, sg_nents
, NULL
, qedr_set_page
);
3081 struct ib_mr
*qedr_get_dma_mr(struct ib_pd
*ibpd
, int acc
)
3083 struct qedr_dev
*dev
= get_qedr_dev(ibpd
->device
);
3084 struct qedr_pd
*pd
= get_qedr_pd(ibpd
);
3088 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
3090 return ERR_PTR(-ENOMEM
);
3092 mr
->type
= QEDR_MR_DMA
;
3094 rc
= dev
->ops
->rdma_alloc_tid(dev
->rdma_ctx
, &mr
->hw_mr
.itid
);
3096 DP_ERR(dev
, "roce alloc tid returned an error %d\n", rc
);
3100 /* index only, 18 bit long, lkey = itid << 8 | key */
3101 mr
->hw_mr
.tid_type
= QED_RDMA_TID_REGISTERED_MR
;
3102 mr
->hw_mr
.pd
= pd
->pd_id
;
3103 mr
->hw_mr
.local_read
= 1;
3104 mr
->hw_mr
.local_write
= (acc
& IB_ACCESS_LOCAL_WRITE
) ? 1 : 0;
3105 mr
->hw_mr
.remote_read
= (acc
& IB_ACCESS_REMOTE_READ
) ? 1 : 0;
3106 mr
->hw_mr
.remote_write
= (acc
& IB_ACCESS_REMOTE_WRITE
) ? 1 : 0;
3107 mr
->hw_mr
.remote_atomic
= (acc
& IB_ACCESS_REMOTE_ATOMIC
) ? 1 : 0;
3108 mr
->hw_mr
.dma_mr
= true;
3110 rc
= dev
->ops
->rdma_register_tid(dev
->rdma_ctx
, &mr
->hw_mr
);
3112 DP_ERR(dev
, "roce register tid returned an error %d\n", rc
);
3116 mr
->ibmr
.lkey
= mr
->hw_mr
.itid
<< 8 | mr
->hw_mr
.key
;
3117 if (mr
->hw_mr
.remote_write
|| mr
->hw_mr
.remote_read
||
3118 mr
->hw_mr
.remote_atomic
)
3119 mr
->ibmr
.rkey
= mr
->hw_mr
.itid
<< 8 | mr
->hw_mr
.key
;
3121 DP_DEBUG(dev
, QEDR_MSG_MR
, "get dma mr: lkey = %x\n", mr
->ibmr
.lkey
);
3125 dev
->ops
->rdma_free_tid(dev
->rdma_ctx
, mr
->hw_mr
.itid
);
3131 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info
*wq
)
3133 return (((wq
->prod
+ 1) % wq
->max_wr
) == wq
->cons
);
3136 static int sge_data_len(struct ib_sge
*sg_list
, int num_sge
)
3140 for (i
= 0; i
< num_sge
; i
++)
3141 len
+= sg_list
[i
].length
;
3146 static void swap_wqe_data64(u64
*p
)
3150 for (i
= 0; i
< QEDR_SQE_ELEMENT_SIZE
/ sizeof(u64
); i
++, p
++)
3151 *p
= cpu_to_be64(cpu_to_le64(*p
));
3154 static u32
qedr_prepare_sq_inline_data(struct qedr_dev
*dev
,
3155 struct qedr_qp
*qp
, u8
*wqe_size
,
3156 const struct ib_send_wr
*wr
,
3157 const struct ib_send_wr
**bad_wr
,
3160 u32 data_size
= sge_data_len(wr
->sg_list
, wr
->num_sge
);
3161 char *seg_prt
, *wqe
;
3164 if (data_size
> ROCE_REQ_MAX_INLINE_DATA_SIZE
) {
3165 DP_ERR(dev
, "Too much inline data in WR: %d\n", data_size
);
3179 /* Copy data inline */
3180 for (i
= 0; i
< wr
->num_sge
; i
++) {
3181 u32 len
= wr
->sg_list
[i
].length
;
3182 void *src
= (void *)(uintptr_t)wr
->sg_list
[i
].addr
;
3187 /* New segment required */
3189 wqe
= (char *)qed_chain_produce(&qp
->sq
.pbl
);
3191 seg_siz
= sizeof(struct rdma_sq_common_wqe
);
3195 /* Calculate currently allowed length */
3196 cur
= min_t(u32
, len
, seg_siz
);
3197 memcpy(seg_prt
, src
, cur
);
3199 /* Update segment variables */
3203 /* Update sge variables */
3207 /* Swap fully-completed segments */
3209 swap_wqe_data64((u64
*)wqe
);
3213 /* swap last not completed segment */
3215 swap_wqe_data64((u64
*)wqe
);
3220 #define RQ_SGE_SET(sge, vaddr, vlength, vflags) \
3222 DMA_REGPAIR_LE(sge->addr, vaddr); \
3223 (sge)->length = cpu_to_le32(vlength); \
3224 (sge)->flags = cpu_to_le32(vflags); \
3227 #define SRQ_HDR_SET(hdr, vwr_id, num_sge) \
3229 DMA_REGPAIR_LE(hdr->wr_id, vwr_id); \
3230 (hdr)->num_sges = num_sge; \
3233 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \
3235 DMA_REGPAIR_LE(sge->addr, vaddr); \
3236 (sge)->length = cpu_to_le32(vlength); \
3237 (sge)->l_key = cpu_to_le32(vlkey); \
3240 static u32
qedr_prepare_sq_sges(struct qedr_qp
*qp
, u8
*wqe_size
,
3241 const struct ib_send_wr
*wr
)
3246 for (i
= 0; i
< wr
->num_sge
; i
++) {
3247 struct rdma_sq_sge
*sge
= qed_chain_produce(&qp
->sq
.pbl
);
3249 DMA_REGPAIR_LE(sge
->addr
, wr
->sg_list
[i
].addr
);
3250 sge
->l_key
= cpu_to_le32(wr
->sg_list
[i
].lkey
);
3251 sge
->length
= cpu_to_le32(wr
->sg_list
[i
].length
);
3252 data_size
+= wr
->sg_list
[i
].length
;
3256 *wqe_size
+= wr
->num_sge
;
3261 static u32
qedr_prepare_sq_rdma_data(struct qedr_dev
*dev
,
3263 struct rdma_sq_rdma_wqe_1st
*rwqe
,
3264 struct rdma_sq_rdma_wqe_2nd
*rwqe2
,
3265 const struct ib_send_wr
*wr
,
3266 const struct ib_send_wr
**bad_wr
)
3268 rwqe2
->r_key
= cpu_to_le32(rdma_wr(wr
)->rkey
);
3269 DMA_REGPAIR_LE(rwqe2
->remote_va
, rdma_wr(wr
)->remote_addr
);
3271 if (wr
->send_flags
& IB_SEND_INLINE
&&
3272 (wr
->opcode
== IB_WR_RDMA_WRITE_WITH_IMM
||
3273 wr
->opcode
== IB_WR_RDMA_WRITE
)) {
3276 SET_FIELD2(flags
, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG
, 1);
3277 return qedr_prepare_sq_inline_data(dev
, qp
, &rwqe
->wqe_size
, wr
,
3278 bad_wr
, &rwqe
->flags
, flags
);
3281 return qedr_prepare_sq_sges(qp
, &rwqe
->wqe_size
, wr
);
3284 static u32
qedr_prepare_sq_send_data(struct qedr_dev
*dev
,
3286 struct rdma_sq_send_wqe_1st
*swqe
,
3287 struct rdma_sq_send_wqe_2st
*swqe2
,
3288 const struct ib_send_wr
*wr
,
3289 const struct ib_send_wr
**bad_wr
)
3291 memset(swqe2
, 0, sizeof(*swqe2
));
3292 if (wr
->send_flags
& IB_SEND_INLINE
) {
3295 SET_FIELD2(flags
, RDMA_SQ_SEND_WQE_INLINE_FLG
, 1);
3296 return qedr_prepare_sq_inline_data(dev
, qp
, &swqe
->wqe_size
, wr
,
3297 bad_wr
, &swqe
->flags
, flags
);
3300 return qedr_prepare_sq_sges(qp
, &swqe
->wqe_size
, wr
);
3303 static int qedr_prepare_reg(struct qedr_qp
*qp
,
3304 struct rdma_sq_fmr_wqe_1st
*fwqe1
,
3305 const struct ib_reg_wr
*wr
)
3307 struct qedr_mr
*mr
= get_qedr_mr(wr
->mr
);
3308 struct rdma_sq_fmr_wqe_2nd
*fwqe2
;
3310 fwqe2
= (struct rdma_sq_fmr_wqe_2nd
*)qed_chain_produce(&qp
->sq
.pbl
);
3311 fwqe1
->addr
.hi
= upper_32_bits(mr
->ibmr
.iova
);
3312 fwqe1
->addr
.lo
= lower_32_bits(mr
->ibmr
.iova
);
3313 fwqe1
->l_key
= wr
->key
;
3315 fwqe2
->access_ctrl
= 0;
3317 SET_FIELD2(fwqe2
->access_ctrl
, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ
,
3318 !!(wr
->access
& IB_ACCESS_REMOTE_READ
));
3319 SET_FIELD2(fwqe2
->access_ctrl
, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE
,
3320 !!(wr
->access
& IB_ACCESS_REMOTE_WRITE
));
3321 SET_FIELD2(fwqe2
->access_ctrl
, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC
,
3322 !!(wr
->access
& IB_ACCESS_REMOTE_ATOMIC
));
3323 SET_FIELD2(fwqe2
->access_ctrl
, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ
, 1);
3324 SET_FIELD2(fwqe2
->access_ctrl
, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE
,
3325 !!(wr
->access
& IB_ACCESS_LOCAL_WRITE
));
3326 fwqe2
->fmr_ctrl
= 0;
3328 SET_FIELD2(fwqe2
->fmr_ctrl
, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG
,
3329 ilog2(mr
->ibmr
.page_size
) - 12);
3331 fwqe2
->length_hi
= 0;
3332 fwqe2
->length_lo
= mr
->ibmr
.length
;
3333 fwqe2
->pbl_addr
.hi
= upper_32_bits(mr
->info
.pbl_table
->pa
);
3334 fwqe2
->pbl_addr
.lo
= lower_32_bits(mr
->info
.pbl_table
->pa
);
3336 qp
->wqe_wr_id
[qp
->sq
.prod
].mr
= mr
;
3341 static enum ib_wc_opcode
qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode
)
3344 case IB_WR_RDMA_WRITE
:
3345 case IB_WR_RDMA_WRITE_WITH_IMM
:
3346 return IB_WC_RDMA_WRITE
;
3347 case IB_WR_SEND_WITH_IMM
:
3349 case IB_WR_SEND_WITH_INV
:
3351 case IB_WR_RDMA_READ
:
3352 case IB_WR_RDMA_READ_WITH_INV
:
3353 return IB_WC_RDMA_READ
;
3354 case IB_WR_ATOMIC_CMP_AND_SWP
:
3355 return IB_WC_COMP_SWAP
;
3356 case IB_WR_ATOMIC_FETCH_AND_ADD
:
3357 return IB_WC_FETCH_ADD
;
3359 return IB_WC_REG_MR
;
3360 case IB_WR_LOCAL_INV
:
3361 return IB_WC_LOCAL_INV
;
3367 static inline bool qedr_can_post_send(struct qedr_qp
*qp
,
3368 const struct ib_send_wr
*wr
)
3370 int wq_is_full
, err_wr
, pbl_is_full
;
3371 struct qedr_dev
*dev
= qp
->dev
;
3373 /* prevent SQ overflow and/or processing of a bad WR */
3374 err_wr
= wr
->num_sge
> qp
->sq
.max_sges
;
3375 wq_is_full
= qedr_wq_is_full(&qp
->sq
);
3376 pbl_is_full
= qed_chain_get_elem_left_u32(&qp
->sq
.pbl
) <
3377 QEDR_MAX_SQE_ELEMENTS_PER_SQE
;
3378 if (wq_is_full
|| err_wr
|| pbl_is_full
) {
3379 if (wq_is_full
&& !(qp
->err_bitmap
& QEDR_QP_ERR_SQ_FULL
)) {
3381 "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3383 qp
->err_bitmap
|= QEDR_QP_ERR_SQ_FULL
;
3386 if (err_wr
&& !(qp
->err_bitmap
& QEDR_QP_ERR_BAD_SR
)) {
3388 "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3390 qp
->err_bitmap
|= QEDR_QP_ERR_BAD_SR
;
3394 !(qp
->err_bitmap
& QEDR_QP_ERR_SQ_PBL_FULL
)) {
3396 "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3398 qp
->err_bitmap
|= QEDR_QP_ERR_SQ_PBL_FULL
;
3405 static int __qedr_post_send(struct ib_qp
*ibqp
, const struct ib_send_wr
*wr
,
3406 const struct ib_send_wr
**bad_wr
)
3408 struct qedr_dev
*dev
= get_qedr_dev(ibqp
->device
);
3409 struct qedr_qp
*qp
= get_qedr_qp(ibqp
);
3410 struct rdma_sq_atomic_wqe_1st
*awqe1
;
3411 struct rdma_sq_atomic_wqe_2nd
*awqe2
;
3412 struct rdma_sq_atomic_wqe_3rd
*awqe3
;
3413 struct rdma_sq_send_wqe_2st
*swqe2
;
3414 struct rdma_sq_local_inv_wqe
*iwqe
;
3415 struct rdma_sq_rdma_wqe_2nd
*rwqe2
;
3416 struct rdma_sq_send_wqe_1st
*swqe
;
3417 struct rdma_sq_rdma_wqe_1st
*rwqe
;
3418 struct rdma_sq_fmr_wqe_1st
*fwqe1
;
3419 struct rdma_sq_common_wqe
*wqe
;
3424 if (!qedr_can_post_send(qp
, wr
)) {
3429 wqe
= qed_chain_produce(&qp
->sq
.pbl
);
3430 qp
->wqe_wr_id
[qp
->sq
.prod
].signaled
=
3431 !!(wr
->send_flags
& IB_SEND_SIGNALED
) || qp
->signaled
;
3434 SET_FIELD2(wqe
->flags
, RDMA_SQ_SEND_WQE_SE_FLG
,
3435 !!(wr
->send_flags
& IB_SEND_SOLICITED
));
3436 comp
= (!!(wr
->send_flags
& IB_SEND_SIGNALED
)) || qp
->signaled
;
3437 SET_FIELD2(wqe
->flags
, RDMA_SQ_SEND_WQE_COMP_FLG
, comp
);
3438 SET_FIELD2(wqe
->flags
, RDMA_SQ_SEND_WQE_RD_FENCE_FLG
,
3439 !!(wr
->send_flags
& IB_SEND_FENCE
));
3440 wqe
->prev_wqe_size
= qp
->prev_wqe_size
;
3442 qp
->wqe_wr_id
[qp
->sq
.prod
].opcode
= qedr_ib_to_wc_opcode(wr
->opcode
);
3444 switch (wr
->opcode
) {
3445 case IB_WR_SEND_WITH_IMM
:
3446 if (unlikely(rdma_protocol_iwarp(&dev
->ibdev
, 1))) {
3451 wqe
->req_type
= RDMA_SQ_REQ_TYPE_SEND_WITH_IMM
;
3452 swqe
= (struct rdma_sq_send_wqe_1st
*)wqe
;
3454 swqe2
= qed_chain_produce(&qp
->sq
.pbl
);
3456 swqe
->inv_key_or_imm_data
= cpu_to_le32(be32_to_cpu(wr
->ex
.imm_data
));
3457 length
= qedr_prepare_sq_send_data(dev
, qp
, swqe
, swqe2
,
3459 swqe
->length
= cpu_to_le32(length
);
3460 qp
->wqe_wr_id
[qp
->sq
.prod
].wqe_size
= swqe
->wqe_size
;
3461 qp
->prev_wqe_size
= swqe
->wqe_size
;
3462 qp
->wqe_wr_id
[qp
->sq
.prod
].bytes_len
= swqe
->length
;
3465 wqe
->req_type
= RDMA_SQ_REQ_TYPE_SEND
;
3466 swqe
= (struct rdma_sq_send_wqe_1st
*)wqe
;
3469 swqe2
= qed_chain_produce(&qp
->sq
.pbl
);
3470 length
= qedr_prepare_sq_send_data(dev
, qp
, swqe
, swqe2
,
3472 swqe
->length
= cpu_to_le32(length
);
3473 qp
->wqe_wr_id
[qp
->sq
.prod
].wqe_size
= swqe
->wqe_size
;
3474 qp
->prev_wqe_size
= swqe
->wqe_size
;
3475 qp
->wqe_wr_id
[qp
->sq
.prod
].bytes_len
= swqe
->length
;
3477 case IB_WR_SEND_WITH_INV
:
3478 wqe
->req_type
= RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE
;
3479 swqe
= (struct rdma_sq_send_wqe_1st
*)wqe
;
3480 swqe2
= qed_chain_produce(&qp
->sq
.pbl
);
3482 swqe
->inv_key_or_imm_data
= cpu_to_le32(wr
->ex
.invalidate_rkey
);
3483 length
= qedr_prepare_sq_send_data(dev
, qp
, swqe
, swqe2
,
3485 swqe
->length
= cpu_to_le32(length
);
3486 qp
->wqe_wr_id
[qp
->sq
.prod
].wqe_size
= swqe
->wqe_size
;
3487 qp
->prev_wqe_size
= swqe
->wqe_size
;
3488 qp
->wqe_wr_id
[qp
->sq
.prod
].bytes_len
= swqe
->length
;
3491 case IB_WR_RDMA_WRITE_WITH_IMM
:
3492 if (unlikely(rdma_protocol_iwarp(&dev
->ibdev
, 1))) {
3497 wqe
->req_type
= RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM
;
3498 rwqe
= (struct rdma_sq_rdma_wqe_1st
*)wqe
;
3501 rwqe
->imm_data
= htonl(cpu_to_le32(wr
->ex
.imm_data
));
3502 rwqe2
= qed_chain_produce(&qp
->sq
.pbl
);
3503 length
= qedr_prepare_sq_rdma_data(dev
, qp
, rwqe
, rwqe2
,
3505 rwqe
->length
= cpu_to_le32(length
);
3506 qp
->wqe_wr_id
[qp
->sq
.prod
].wqe_size
= rwqe
->wqe_size
;
3507 qp
->prev_wqe_size
= rwqe
->wqe_size
;
3508 qp
->wqe_wr_id
[qp
->sq
.prod
].bytes_len
= rwqe
->length
;
3510 case IB_WR_RDMA_WRITE
:
3511 wqe
->req_type
= RDMA_SQ_REQ_TYPE_RDMA_WR
;
3512 rwqe
= (struct rdma_sq_rdma_wqe_1st
*)wqe
;
3515 rwqe2
= qed_chain_produce(&qp
->sq
.pbl
);
3516 length
= qedr_prepare_sq_rdma_data(dev
, qp
, rwqe
, rwqe2
,
3518 rwqe
->length
= cpu_to_le32(length
);
3519 qp
->wqe_wr_id
[qp
->sq
.prod
].wqe_size
= rwqe
->wqe_size
;
3520 qp
->prev_wqe_size
= rwqe
->wqe_size
;
3521 qp
->wqe_wr_id
[qp
->sq
.prod
].bytes_len
= rwqe
->length
;
3523 case IB_WR_RDMA_READ_WITH_INV
:
3524 SET_FIELD2(wqe
->flags
, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG
, 1);
3525 /* fallthrough -- same is identical to RDMA READ */
3527 case IB_WR_RDMA_READ
:
3528 wqe
->req_type
= RDMA_SQ_REQ_TYPE_RDMA_RD
;
3529 rwqe
= (struct rdma_sq_rdma_wqe_1st
*)wqe
;
3532 rwqe2
= qed_chain_produce(&qp
->sq
.pbl
);
3533 length
= qedr_prepare_sq_rdma_data(dev
, qp
, rwqe
, rwqe2
,
3535 rwqe
->length
= cpu_to_le32(length
);
3536 qp
->wqe_wr_id
[qp
->sq
.prod
].wqe_size
= rwqe
->wqe_size
;
3537 qp
->prev_wqe_size
= rwqe
->wqe_size
;
3538 qp
->wqe_wr_id
[qp
->sq
.prod
].bytes_len
= rwqe
->length
;
3541 case IB_WR_ATOMIC_CMP_AND_SWP
:
3542 case IB_WR_ATOMIC_FETCH_AND_ADD
:
3543 awqe1
= (struct rdma_sq_atomic_wqe_1st
*)wqe
;
3544 awqe1
->wqe_size
= 4;
3546 awqe2
= qed_chain_produce(&qp
->sq
.pbl
);
3547 DMA_REGPAIR_LE(awqe2
->remote_va
, atomic_wr(wr
)->remote_addr
);
3548 awqe2
->r_key
= cpu_to_le32(atomic_wr(wr
)->rkey
);
3550 awqe3
= qed_chain_produce(&qp
->sq
.pbl
);
3552 if (wr
->opcode
== IB_WR_ATOMIC_FETCH_AND_ADD
) {
3553 wqe
->req_type
= RDMA_SQ_REQ_TYPE_ATOMIC_ADD
;
3554 DMA_REGPAIR_LE(awqe3
->swap_data
,
3555 atomic_wr(wr
)->compare_add
);
3557 wqe
->req_type
= RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP
;
3558 DMA_REGPAIR_LE(awqe3
->swap_data
,
3559 atomic_wr(wr
)->swap
);
3560 DMA_REGPAIR_LE(awqe3
->cmp_data
,
3561 atomic_wr(wr
)->compare_add
);
3564 qedr_prepare_sq_sges(qp
, NULL
, wr
);
3566 qp
->wqe_wr_id
[qp
->sq
.prod
].wqe_size
= awqe1
->wqe_size
;
3567 qp
->prev_wqe_size
= awqe1
->wqe_size
;
3570 case IB_WR_LOCAL_INV
:
3571 iwqe
= (struct rdma_sq_local_inv_wqe
*)wqe
;
3574 iwqe
->req_type
= RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE
;
3575 iwqe
->inv_l_key
= wr
->ex
.invalidate_rkey
;
3576 qp
->wqe_wr_id
[qp
->sq
.prod
].wqe_size
= iwqe
->wqe_size
;
3577 qp
->prev_wqe_size
= iwqe
->wqe_size
;
3580 DP_DEBUG(dev
, QEDR_MSG_CQ
, "REG_MR\n");
3581 wqe
->req_type
= RDMA_SQ_REQ_TYPE_FAST_MR
;
3582 fwqe1
= (struct rdma_sq_fmr_wqe_1st
*)wqe
;
3583 fwqe1
->wqe_size
= 2;
3585 rc
= qedr_prepare_reg(qp
, fwqe1
, reg_wr(wr
));
3587 DP_ERR(dev
, "IB_REG_MR failed rc=%d\n", rc
);
3592 qp
->wqe_wr_id
[qp
->sq
.prod
].wqe_size
= fwqe1
->wqe_size
;
3593 qp
->prev_wqe_size
= fwqe1
->wqe_size
;
3596 DP_ERR(dev
, "invalid opcode 0x%x!\n", wr
->opcode
);
3605 /* Restore prod to its position before
3606 * this WR was processed
3608 value
= le16_to_cpu(qp
->sq
.db_data
.data
.value
);
3609 qed_chain_set_prod(&qp
->sq
.pbl
, value
, wqe
);
3611 /* Restore prev_wqe_size */
3612 qp
->prev_wqe_size
= wqe
->prev_wqe_size
;
3614 DP_ERR(dev
, "POST SEND FAILED\n");
3620 int qedr_post_send(struct ib_qp
*ibqp
, const struct ib_send_wr
*wr
,
3621 const struct ib_send_wr
**bad_wr
)
3623 struct qedr_dev
*dev
= get_qedr_dev(ibqp
->device
);
3624 struct qedr_qp
*qp
= get_qedr_qp(ibqp
);
3625 unsigned long flags
;
3630 if (qp
->qp_type
== IB_QPT_GSI
)
3631 return qedr_gsi_post_send(ibqp
, wr
, bad_wr
);
3633 spin_lock_irqsave(&qp
->q_lock
, flags
);
3635 if (rdma_protocol_roce(&dev
->ibdev
, 1)) {
3636 if ((qp
->state
!= QED_ROCE_QP_STATE_RTS
) &&
3637 (qp
->state
!= QED_ROCE_QP_STATE_ERR
) &&
3638 (qp
->state
!= QED_ROCE_QP_STATE_SQD
)) {
3639 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
3641 DP_DEBUG(dev
, QEDR_MSG_CQ
,
3642 "QP in wrong state! QP icid=0x%x state %d\n",
3643 qp
->icid
, qp
->state
);
3649 rc
= __qedr_post_send(ibqp
, wr
, bad_wr
);
3653 qp
->wqe_wr_id
[qp
->sq
.prod
].wr_id
= wr
->wr_id
;
3655 qedr_inc_sw_prod(&qp
->sq
);
3657 qp
->sq
.db_data
.data
.value
++;
3663 * If there was a failure in the first WR then it will be triggered in
3664 * vane. However this is not harmful (as long as the producer value is
3665 * unchanged). For performance reasons we avoid checking for this
3666 * redundant doorbell.
3668 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3669 * soon as we give the doorbell, we could get a completion
3670 * for this wr, therefore we need to make sure that the
3671 * memory is updated before giving the doorbell.
3672 * During qedr_poll_cq, rmb is called before accessing the
3673 * cqe. This covers for the smp_rmb as well.
3676 writel(qp
->sq
.db_data
.raw
, qp
->sq
.db
);
3678 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
3683 static u32
qedr_srq_elem_left(struct qedr_srq_hwq_info
*hw_srq
)
3687 /* Calculate number of elements used based on producer
3688 * count and consumer count and subtract it from max
3689 * work request supported so that we get elements left.
3691 used
= hw_srq
->wr_prod_cnt
- hw_srq
->wr_cons_cnt
;
3693 return hw_srq
->max_wr
- used
;
3696 int qedr_post_srq_recv(struct ib_srq
*ibsrq
, const struct ib_recv_wr
*wr
,
3697 const struct ib_recv_wr
**bad_wr
)
3699 struct qedr_srq
*srq
= get_qedr_srq(ibsrq
);
3700 struct qedr_srq_hwq_info
*hw_srq
;
3701 struct qedr_dev
*dev
= srq
->dev
;
3702 struct qed_chain
*pbl
;
3703 unsigned long flags
;
3708 spin_lock_irqsave(&srq
->lock
, flags
);
3710 hw_srq
= &srq
->hw_srq
;
3711 pbl
= &srq
->hw_srq
.pbl
;
3713 struct rdma_srq_wqe_header
*hdr
;
3716 if (!qedr_srq_elem_left(hw_srq
) ||
3717 wr
->num_sge
> srq
->hw_srq
.max_sges
) {
3718 DP_ERR(dev
, "Can't post WR (%d,%d) || (%d > %d)\n",
3719 hw_srq
->wr_prod_cnt
, hw_srq
->wr_cons_cnt
,
3720 wr
->num_sge
, srq
->hw_srq
.max_sges
);
3726 hdr
= qed_chain_produce(pbl
);
3727 num_sge
= wr
->num_sge
;
3728 /* Set number of sge and work request id in header */
3729 SRQ_HDR_SET(hdr
, wr
->wr_id
, num_sge
);
3731 srq
->hw_srq
.wr_prod_cnt
++;
3735 DP_DEBUG(dev
, QEDR_MSG_SRQ
,
3736 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3737 wr
->num_sge
, hw_srq
->wqe_prod
, wr
->wr_id
);
3739 for (i
= 0; i
< wr
->num_sge
; i
++) {
3740 struct rdma_srq_sge
*srq_sge
= qed_chain_produce(pbl
);
3742 /* Set SGE length, lkey and address */
3743 SRQ_SGE_SET(srq_sge
, wr
->sg_list
[i
].addr
,
3744 wr
->sg_list
[i
].length
, wr
->sg_list
[i
].lkey
);
3746 DP_DEBUG(dev
, QEDR_MSG_SRQ
,
3747 "[%d]: len %d key %x addr %x:%x\n",
3748 i
, srq_sge
->length
, srq_sge
->l_key
,
3749 srq_sge
->addr
.hi
, srq_sge
->addr
.lo
);
3753 /* Flush WQE and SGE information before
3754 * updating producer.
3758 /* SRQ producer is 8 bytes. Need to update SGE producer index
3759 * in first 4 bytes and need to update WQE producer in
3762 *srq
->hw_srq
.virt_prod_pair_addr
= hw_srq
->sge_prod
;
3763 offset
= offsetof(struct rdma_srq_producers
, wqe_prod
);
3764 *((u8
*)srq
->hw_srq
.virt_prod_pair_addr
+ offset
) =
3767 /* Flush producer after updating it. */
3772 DP_DEBUG(dev
, QEDR_MSG_SRQ
, "POST: Elements in S-RQ: %d\n",
3773 qed_chain_get_elem_left(pbl
));
3774 spin_unlock_irqrestore(&srq
->lock
, flags
);
3779 int qedr_post_recv(struct ib_qp
*ibqp
, const struct ib_recv_wr
*wr
,
3780 const struct ib_recv_wr
**bad_wr
)
3782 struct qedr_qp
*qp
= get_qedr_qp(ibqp
);
3783 struct qedr_dev
*dev
= qp
->dev
;
3784 unsigned long flags
;
3787 if (qp
->qp_type
== IB_QPT_GSI
)
3788 return qedr_gsi_post_recv(ibqp
, wr
, bad_wr
);
3790 spin_lock_irqsave(&qp
->q_lock
, flags
);
3792 if (qp
->state
== QED_ROCE_QP_STATE_RESET
) {
3793 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
3801 if (qed_chain_get_elem_left_u32(&qp
->rq
.pbl
) <
3802 QEDR_MAX_RQE_ELEMENTS_PER_RQE
||
3803 wr
->num_sge
> qp
->rq
.max_sges
) {
3804 DP_ERR(dev
, "Can't post WR (%d < %d) || (%d > %d)\n",
3805 qed_chain_get_elem_left_u32(&qp
->rq
.pbl
),
3806 QEDR_MAX_RQE_ELEMENTS_PER_RQE
, wr
->num_sge
,
3812 for (i
= 0; i
< wr
->num_sge
; i
++) {
3814 struct rdma_rq_sge
*rqe
=
3815 qed_chain_produce(&qp
->rq
.pbl
);
3817 /* First one must include the number
3818 * of SGE in the list
3821 SET_FIELD(flags
, RDMA_RQ_SGE_NUM_SGES
,
3824 SET_FIELD(flags
, RDMA_RQ_SGE_L_KEY_LO
,
3825 wr
->sg_list
[i
].lkey
);
3827 RQ_SGE_SET(rqe
, wr
->sg_list
[i
].addr
,
3828 wr
->sg_list
[i
].length
, flags
);
3831 /* Special case of no sges. FW requires between 1-4 sges...
3832 * in this case we need to post 1 sge with length zero. this is
3833 * because rdma write with immediate consumes an RQ.
3837 struct rdma_rq_sge
*rqe
=
3838 qed_chain_produce(&qp
->rq
.pbl
);
3840 /* First one must include the number
3841 * of SGE in the list
3843 SET_FIELD(flags
, RDMA_RQ_SGE_L_KEY_LO
, 0);
3844 SET_FIELD(flags
, RDMA_RQ_SGE_NUM_SGES
, 1);
3846 RQ_SGE_SET(rqe
, 0, 0, flags
);
3850 qp
->rqe_wr_id
[qp
->rq
.prod
].wr_id
= wr
->wr_id
;
3851 qp
->rqe_wr_id
[qp
->rq
.prod
].wqe_size
= i
;
3853 qedr_inc_sw_prod(&qp
->rq
);
3855 /* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3856 * soon as we give the doorbell, we could get a completion
3857 * for this wr, therefore we need to make sure that the
3858 * memory is update before giving the doorbell.
3859 * During qedr_poll_cq, rmb is called before accessing the
3860 * cqe. This covers for the smp_rmb as well.
3864 qp
->rq
.db_data
.data
.value
++;
3866 writel(qp
->rq
.db_data
.raw
, qp
->rq
.db
);
3868 if (rdma_protocol_iwarp(&dev
->ibdev
, 1)) {
3869 writel(qp
->rq
.iwarp_db2_data
.raw
, qp
->rq
.iwarp_db2
);
3875 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
3880 static int is_valid_cqe(struct qedr_cq
*cq
, union rdma_cqe
*cqe
)
3882 struct rdma_cqe_requester
*resp_cqe
= &cqe
->req
;
3884 return (resp_cqe
->flags
& RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK
) ==
3888 static struct qedr_qp
*cqe_get_qp(union rdma_cqe
*cqe
)
3890 struct rdma_cqe_requester
*resp_cqe
= &cqe
->req
;
3893 qp
= (struct qedr_qp
*)(uintptr_t)HILO_GEN(resp_cqe
->qp_handle
.hi
,
3894 resp_cqe
->qp_handle
.lo
,
3899 static enum rdma_cqe_type
cqe_get_type(union rdma_cqe
*cqe
)
3901 struct rdma_cqe_requester
*resp_cqe
= &cqe
->req
;
3903 return GET_FIELD(resp_cqe
->flags
, RDMA_CQE_REQUESTER_TYPE
);
3906 /* Return latest CQE (needs processing) */
3907 static union rdma_cqe
*get_cqe(struct qedr_cq
*cq
)
3909 return cq
->latest_cqe
;
3912 /* In fmr we need to increase the number of fmr completed counter for the fmr
3913 * algorithm determining whether we can free a pbl or not.
3914 * we need to perform this whether the work request was signaled or not. for
3915 * this purpose we call this function from the condition that checks if a wr
3916 * should be skipped, to make sure we don't miss it ( possibly this fmr
3917 * operation was not signalted)
3919 static inline void qedr_chk_if_fmr(struct qedr_qp
*qp
)
3921 if (qp
->wqe_wr_id
[qp
->sq
.cons
].opcode
== IB_WC_REG_MR
)
3922 qp
->wqe_wr_id
[qp
->sq
.cons
].mr
->info
.completed
++;
3925 static int process_req(struct qedr_dev
*dev
, struct qedr_qp
*qp
,
3926 struct qedr_cq
*cq
, int num_entries
,
3927 struct ib_wc
*wc
, u16 hw_cons
, enum ib_wc_status status
,
3932 while (num_entries
&& qp
->sq
.wqe_cons
!= hw_cons
) {
3933 if (!qp
->wqe_wr_id
[qp
->sq
.cons
].signaled
&& !force
) {
3934 qedr_chk_if_fmr(qp
);
3940 wc
->status
= status
;
3943 wc
->src_qp
= qp
->id
;
3946 wc
->wr_id
= qp
->wqe_wr_id
[qp
->sq
.cons
].wr_id
;
3947 wc
->opcode
= qp
->wqe_wr_id
[qp
->sq
.cons
].opcode
;
3949 switch (wc
->opcode
) {
3950 case IB_WC_RDMA_WRITE
:
3951 wc
->byte_len
= qp
->wqe_wr_id
[qp
->sq
.cons
].bytes_len
;
3953 case IB_WC_COMP_SWAP
:
3954 case IB_WC_FETCH_ADD
:
3958 qp
->wqe_wr_id
[qp
->sq
.cons
].mr
->info
.completed
++;
3960 case IB_WC_RDMA_READ
:
3962 wc
->byte_len
= qp
->wqe_wr_id
[qp
->sq
.cons
].bytes_len
;
3972 while (qp
->wqe_wr_id
[qp
->sq
.cons
].wqe_size
--)
3973 qed_chain_consume(&qp
->sq
.pbl
);
3974 qedr_inc_sw_cons(&qp
->sq
);
3980 static int qedr_poll_cq_req(struct qedr_dev
*dev
,
3981 struct qedr_qp
*qp
, struct qedr_cq
*cq
,
3982 int num_entries
, struct ib_wc
*wc
,
3983 struct rdma_cqe_requester
*req
)
3987 switch (req
->status
) {
3988 case RDMA_CQE_REQ_STS_OK
:
3989 cnt
= process_req(dev
, qp
, cq
, num_entries
, wc
, req
->sq_cons
,
3992 case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR
:
3993 if (qp
->state
!= QED_ROCE_QP_STATE_ERR
)
3994 DP_DEBUG(dev
, QEDR_MSG_CQ
,
3995 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3996 cq
->icid
, qp
->icid
);
3997 cnt
= process_req(dev
, qp
, cq
, num_entries
, wc
, req
->sq_cons
,
3998 IB_WC_WR_FLUSH_ERR
, 1);
4001 /* process all WQE before the cosumer */
4002 qp
->state
= QED_ROCE_QP_STATE_ERR
;
4003 cnt
= process_req(dev
, qp
, cq
, num_entries
, wc
,
4004 req
->sq_cons
- 1, IB_WC_SUCCESS
, 0);
4006 /* if we have extra WC fill it with actual error info */
4007 if (cnt
< num_entries
) {
4008 enum ib_wc_status wc_status
;
4010 switch (req
->status
) {
4011 case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR
:
4013 "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4014 cq
->icid
, qp
->icid
);
4015 wc_status
= IB_WC_BAD_RESP_ERR
;
4017 case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR
:
4019 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4020 cq
->icid
, qp
->icid
);
4021 wc_status
= IB_WC_LOC_LEN_ERR
;
4023 case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR
:
4025 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4026 cq
->icid
, qp
->icid
);
4027 wc_status
= IB_WC_LOC_QP_OP_ERR
;
4029 case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR
:
4031 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4032 cq
->icid
, qp
->icid
);
4033 wc_status
= IB_WC_LOC_PROT_ERR
;
4035 case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR
:
4037 "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4038 cq
->icid
, qp
->icid
);
4039 wc_status
= IB_WC_MW_BIND_ERR
;
4041 case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR
:
4043 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4044 cq
->icid
, qp
->icid
);
4045 wc_status
= IB_WC_REM_INV_REQ_ERR
;
4047 case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR
:
4049 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4050 cq
->icid
, qp
->icid
);
4051 wc_status
= IB_WC_REM_ACCESS_ERR
;
4053 case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR
:
4055 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4056 cq
->icid
, qp
->icid
);
4057 wc_status
= IB_WC_REM_OP_ERR
;
4059 case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR
:
4061 "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4062 cq
->icid
, qp
->icid
);
4063 wc_status
= IB_WC_RNR_RETRY_EXC_ERR
;
4065 case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR
:
4067 "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4068 cq
->icid
, qp
->icid
);
4069 wc_status
= IB_WC_RETRY_EXC_ERR
;
4073 "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
4074 cq
->icid
, qp
->icid
);
4075 wc_status
= IB_WC_GENERAL_ERR
;
4077 cnt
+= process_req(dev
, qp
, cq
, 1, wc
, req
->sq_cons
,
4085 static inline int qedr_cqe_resp_status_to_ib(u8 status
)
4088 case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR
:
4089 return IB_WC_LOC_ACCESS_ERR
;
4090 case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR
:
4091 return IB_WC_LOC_LEN_ERR
;
4092 case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR
:
4093 return IB_WC_LOC_QP_OP_ERR
;
4094 case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR
:
4095 return IB_WC_LOC_PROT_ERR
;
4096 case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR
:
4097 return IB_WC_MW_BIND_ERR
;
4098 case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR
:
4099 return IB_WC_REM_INV_RD_REQ_ERR
;
4100 case RDMA_CQE_RESP_STS_OK
:
4101 return IB_WC_SUCCESS
;
4103 return IB_WC_GENERAL_ERR
;
4107 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder
*resp
,
4110 wc
->status
= IB_WC_SUCCESS
;
4111 wc
->byte_len
= le32_to_cpu(resp
->length
);
4113 if (resp
->flags
& QEDR_RESP_IMM
) {
4114 wc
->ex
.imm_data
= cpu_to_be32(le32_to_cpu(resp
->imm_data_or_inv_r_Key
));
4115 wc
->wc_flags
|= IB_WC_WITH_IMM
;
4117 if (resp
->flags
& QEDR_RESP_RDMA
)
4118 wc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
4120 if (resp
->flags
& QEDR_RESP_INV
)
4123 } else if (resp
->flags
& QEDR_RESP_INV
) {
4124 wc
->ex
.imm_data
= le32_to_cpu(resp
->imm_data_or_inv_r_Key
);
4125 wc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
4127 if (resp
->flags
& QEDR_RESP_RDMA
)
4130 } else if (resp
->flags
& QEDR_RESP_RDMA
) {
4137 static void __process_resp_one(struct qedr_dev
*dev
, struct qedr_qp
*qp
,
4138 struct qedr_cq
*cq
, struct ib_wc
*wc
,
4139 struct rdma_cqe_responder
*resp
, u64 wr_id
)
4141 /* Must fill fields before qedr_set_ok_cqe_resp_wc() */
4142 wc
->opcode
= IB_WC_RECV
;
4145 if (likely(resp
->status
== RDMA_CQE_RESP_STS_OK
)) {
4146 if (qedr_set_ok_cqe_resp_wc(resp
, wc
))
4148 "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
4149 cq
, cq
->icid
, resp
->flags
);
4152 wc
->status
= qedr_cqe_resp_status_to_ib(resp
->status
);
4153 if (wc
->status
== IB_WC_GENERAL_ERR
)
4155 "CQ %p (icid=%d) contains an invalid CQE status %d\n",
4156 cq
, cq
->icid
, resp
->status
);
4159 /* Fill the rest of the WC */
4161 wc
->src_qp
= qp
->id
;
4166 static int process_resp_one_srq(struct qedr_dev
*dev
, struct qedr_qp
*qp
,
4167 struct qedr_cq
*cq
, struct ib_wc
*wc
,
4168 struct rdma_cqe_responder
*resp
)
4170 struct qedr_srq
*srq
= qp
->srq
;
4173 wr_id
= HILO_GEN(le32_to_cpu(resp
->srq_wr_id
.hi
),
4174 le32_to_cpu(resp
->srq_wr_id
.lo
), u64
);
4176 if (resp
->status
== RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR
) {
4177 wc
->status
= IB_WC_WR_FLUSH_ERR
;
4181 wc
->src_qp
= qp
->id
;
4185 __process_resp_one(dev
, qp
, cq
, wc
, resp
, wr_id
);
4187 srq
->hw_srq
.wr_cons_cnt
++;
4191 static int process_resp_one(struct qedr_dev
*dev
, struct qedr_qp
*qp
,
4192 struct qedr_cq
*cq
, struct ib_wc
*wc
,
4193 struct rdma_cqe_responder
*resp
)
4195 u64 wr_id
= qp
->rqe_wr_id
[qp
->rq
.cons
].wr_id
;
4197 __process_resp_one(dev
, qp
, cq
, wc
, resp
, wr_id
);
4199 while (qp
->rqe_wr_id
[qp
->rq
.cons
].wqe_size
--)
4200 qed_chain_consume(&qp
->rq
.pbl
);
4201 qedr_inc_sw_cons(&qp
->rq
);
4206 static int process_resp_flush(struct qedr_qp
*qp
, struct qedr_cq
*cq
,
4207 int num_entries
, struct ib_wc
*wc
, u16 hw_cons
)
4211 while (num_entries
&& qp
->rq
.wqe_cons
!= hw_cons
) {
4213 wc
->status
= IB_WC_WR_FLUSH_ERR
;
4216 wc
->src_qp
= qp
->id
;
4218 wc
->wr_id
= qp
->rqe_wr_id
[qp
->rq
.cons
].wr_id
;
4223 while (qp
->rqe_wr_id
[qp
->rq
.cons
].wqe_size
--)
4224 qed_chain_consume(&qp
->rq
.pbl
);
4225 qedr_inc_sw_cons(&qp
->rq
);
4231 static void try_consume_resp_cqe(struct qedr_cq
*cq
, struct qedr_qp
*qp
,
4232 struct rdma_cqe_responder
*resp
, int *update
)
4234 if (le16_to_cpu(resp
->rq_cons_or_srq_id
) == qp
->rq
.wqe_cons
) {
4240 static int qedr_poll_cq_resp_srq(struct qedr_dev
*dev
, struct qedr_qp
*qp
,
4241 struct qedr_cq
*cq
, int num_entries
,
4243 struct rdma_cqe_responder
*resp
)
4247 cnt
= process_resp_one_srq(dev
, qp
, cq
, wc
, resp
);
4253 static int qedr_poll_cq_resp(struct qedr_dev
*dev
, struct qedr_qp
*qp
,
4254 struct qedr_cq
*cq
, int num_entries
,
4255 struct ib_wc
*wc
, struct rdma_cqe_responder
*resp
,
4260 if (resp
->status
== RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR
) {
4261 cnt
= process_resp_flush(qp
, cq
, num_entries
, wc
,
4262 resp
->rq_cons_or_srq_id
);
4263 try_consume_resp_cqe(cq
, qp
, resp
, update
);
4265 cnt
= process_resp_one(dev
, qp
, cq
, wc
, resp
);
4273 static void try_consume_req_cqe(struct qedr_cq
*cq
, struct qedr_qp
*qp
,
4274 struct rdma_cqe_requester
*req
, int *update
)
4276 if (le16_to_cpu(req
->sq_cons
) == qp
->sq
.wqe_cons
) {
4282 int qedr_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
4284 struct qedr_dev
*dev
= get_qedr_dev(ibcq
->device
);
4285 struct qedr_cq
*cq
= get_qedr_cq(ibcq
);
4286 union rdma_cqe
*cqe
;
4287 u32 old_cons
, new_cons
;
4288 unsigned long flags
;
4292 if (cq
->destroyed
) {
4294 "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4299 if (cq
->cq_type
== QEDR_CQ_TYPE_GSI
)
4300 return qedr_gsi_poll_cq(ibcq
, num_entries
, wc
);
4302 spin_lock_irqsave(&cq
->cq_lock
, flags
);
4303 cqe
= cq
->latest_cqe
;
4304 old_cons
= qed_chain_get_cons_idx_u32(&cq
->pbl
);
4305 while (num_entries
&& is_valid_cqe(cq
, cqe
)) {
4309 /* prevent speculative reads of any field of CQE */
4312 qp
= cqe_get_qp(cqe
);
4314 WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe
);
4320 switch (cqe_get_type(cqe
)) {
4321 case RDMA_CQE_TYPE_REQUESTER
:
4322 cnt
= qedr_poll_cq_req(dev
, qp
, cq
, num_entries
, wc
,
4324 try_consume_req_cqe(cq
, qp
, &cqe
->req
, &update
);
4326 case RDMA_CQE_TYPE_RESPONDER_RQ
:
4327 cnt
= qedr_poll_cq_resp(dev
, qp
, cq
, num_entries
, wc
,
4328 &cqe
->resp
, &update
);
4330 case RDMA_CQE_TYPE_RESPONDER_SRQ
:
4331 cnt
= qedr_poll_cq_resp_srq(dev
, qp
, cq
, num_entries
,
4335 case RDMA_CQE_TYPE_INVALID
:
4337 DP_ERR(dev
, "Error: invalid CQE type = %d\n",
4346 new_cons
= qed_chain_get_cons_idx_u32(&cq
->pbl
);
4348 cq
->cq_cons
+= new_cons
- old_cons
;
4351 /* doorbell notifies abount latest VALID entry,
4352 * but chain already point to the next INVALID one
4354 doorbell_cq(cq
, cq
->cq_cons
- 1, cq
->arm_flags
);
4356 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
4360 int qedr_process_mad(struct ib_device
*ibdev
, int process_mad_flags
,
4361 u8 port_num
, const struct ib_wc
*in_wc
,
4362 const struct ib_grh
*in_grh
, const struct ib_mad
*in
,
4363 struct ib_mad
*out_mad
, size_t *out_mad_size
,
4364 u16
*out_mad_pkey_index
)
4366 return IB_MAD_RESULT_SUCCESS
;