Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
[cris-mirror.git] / drivers / infiniband / hw / qedr / verbs.c
blob53f00dbf313f757941d32451ae23e62305f9cf53
1 /* QLogic qedr NIC Driver
2 * Copyright (c) 2015-2016 QLogic Corporation
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and /or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_roce_cm.h"
54 #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
57 size_t len)
59 size_t min_len = min_t(size_t, len, udata->outlen);
61 return ib_copy_to_udata(udata, src, min_len);
64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
66 if (index > QEDR_ROCE_PKEY_TABLE_LEN)
67 return -EINVAL;
69 *pkey = QEDR_ROCE_PKEY_DEFAULT;
70 return 0;
73 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
74 int index, union ib_gid *sgid)
76 struct qedr_dev *dev = get_qedr_dev(ibdev);
78 memset(sgid->raw, 0, sizeof(sgid->raw));
79 ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
81 DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
82 sgid->global.interface_id, sgid->global.subnet_prefix);
84 return 0;
87 int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
88 union ib_gid *sgid)
90 struct qedr_dev *dev = get_qedr_dev(ibdev);
91 int rc = 0;
93 if (!rdma_cap_roce_gid_table(ibdev, port))
94 return -ENODEV;
96 rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
97 if (rc == -EAGAIN) {
98 memcpy(sgid, &zgid, sizeof(*sgid));
99 return 0;
102 DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
103 sgid->global.interface_id, sgid->global.subnet_prefix);
105 return rc;
108 int qedr_add_gid(struct ib_device *device, u8 port_num,
109 unsigned int index, const union ib_gid *gid,
110 const struct ib_gid_attr *attr, void **context)
112 if (!rdma_cap_roce_gid_table(device, port_num))
113 return -EINVAL;
115 if (port_num > QEDR_MAX_PORT)
116 return -EINVAL;
118 if (!context)
119 return -EINVAL;
121 return 0;
124 int qedr_del_gid(struct ib_device *device, u8 port_num,
125 unsigned int index, void **context)
127 if (!rdma_cap_roce_gid_table(device, port_num))
128 return -EINVAL;
130 if (port_num > QEDR_MAX_PORT)
131 return -EINVAL;
133 if (!context)
134 return -EINVAL;
136 return 0;
139 int qedr_query_device(struct ib_device *ibdev,
140 struct ib_device_attr *attr, struct ib_udata *udata)
142 struct qedr_dev *dev = get_qedr_dev(ibdev);
143 struct qedr_device_attr *qattr = &dev->attr;
145 if (!dev->rdma_ctx) {
146 DP_ERR(dev,
147 "qedr_query_device called with invalid params rdma_ctx=%p\n",
148 dev->rdma_ctx);
149 return -EINVAL;
152 memset(attr, 0, sizeof(*attr));
154 attr->fw_ver = qattr->fw_ver;
155 attr->sys_image_guid = qattr->sys_image_guid;
156 attr->max_mr_size = qattr->max_mr_size;
157 attr->page_size_cap = qattr->page_size_caps;
158 attr->vendor_id = qattr->vendor_id;
159 attr->vendor_part_id = qattr->vendor_part_id;
160 attr->hw_ver = qattr->hw_ver;
161 attr->max_qp = qattr->max_qp;
162 attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
163 attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
164 IB_DEVICE_RC_RNR_NAK_GEN |
165 IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
167 attr->max_sge = qattr->max_sge;
168 attr->max_sge_rd = qattr->max_sge;
169 attr->max_cq = qattr->max_cq;
170 attr->max_cqe = qattr->max_cqe;
171 attr->max_mr = qattr->max_mr;
172 attr->max_mw = qattr->max_mw;
173 attr->max_pd = qattr->max_pd;
174 attr->atomic_cap = dev->atomic_cap;
175 attr->max_fmr = qattr->max_fmr;
176 attr->max_map_per_fmr = 16;
177 attr->max_qp_init_rd_atom =
178 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
179 attr->max_qp_rd_atom =
180 min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
181 attr->max_qp_init_rd_atom);
183 attr->max_srq = qattr->max_srq;
184 attr->max_srq_sge = qattr->max_srq_sge;
185 attr->max_srq_wr = qattr->max_srq_wr;
187 attr->local_ca_ack_delay = qattr->dev_ack_delay;
188 attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
189 attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
190 attr->max_ah = qattr->max_ah;
192 return 0;
195 #define QEDR_SPEED_SDR (1)
196 #define QEDR_SPEED_DDR (2)
197 #define QEDR_SPEED_QDR (4)
198 #define QEDR_SPEED_FDR10 (8)
199 #define QEDR_SPEED_FDR (16)
200 #define QEDR_SPEED_EDR (32)
202 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
203 u8 *ib_width)
205 switch (speed) {
206 case 1000:
207 *ib_speed = QEDR_SPEED_SDR;
208 *ib_width = IB_WIDTH_1X;
209 break;
210 case 10000:
211 *ib_speed = QEDR_SPEED_QDR;
212 *ib_width = IB_WIDTH_1X;
213 break;
215 case 20000:
216 *ib_speed = QEDR_SPEED_DDR;
217 *ib_width = IB_WIDTH_4X;
218 break;
220 case 25000:
221 *ib_speed = QEDR_SPEED_EDR;
222 *ib_width = IB_WIDTH_1X;
223 break;
225 case 40000:
226 *ib_speed = QEDR_SPEED_QDR;
227 *ib_width = IB_WIDTH_4X;
228 break;
230 case 50000:
231 *ib_speed = QEDR_SPEED_QDR;
232 *ib_width = IB_WIDTH_4X;
233 break;
235 case 100000:
236 *ib_speed = QEDR_SPEED_EDR;
237 *ib_width = IB_WIDTH_4X;
238 break;
240 default:
241 /* Unsupported */
242 *ib_speed = QEDR_SPEED_SDR;
243 *ib_width = IB_WIDTH_1X;
247 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
249 struct qedr_dev *dev;
250 struct qed_rdma_port *rdma_port;
252 dev = get_qedr_dev(ibdev);
253 if (port > 1) {
254 DP_ERR(dev, "invalid_port=0x%x\n", port);
255 return -EINVAL;
258 if (!dev->rdma_ctx) {
259 DP_ERR(dev, "rdma_ctx is NULL\n");
260 return -EINVAL;
263 rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
265 /* *attr being zeroed by the caller, avoid zeroing it here */
266 if (rdma_port->port_state == QED_RDMA_PORT_UP) {
267 attr->state = IB_PORT_ACTIVE;
268 attr->phys_state = 5;
269 } else {
270 attr->state = IB_PORT_DOWN;
271 attr->phys_state = 3;
273 attr->max_mtu = IB_MTU_4096;
274 attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
275 attr->lid = 0;
276 attr->lmc = 0;
277 attr->sm_lid = 0;
278 attr->sm_sl = 0;
279 attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
280 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
281 attr->gid_tbl_len = 1;
282 attr->pkey_tbl_len = 1;
283 } else {
284 attr->gid_tbl_len = QEDR_MAX_SGID;
285 attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
287 attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
288 attr->qkey_viol_cntr = 0;
289 get_link_speed_and_width(rdma_port->link_speed,
290 &attr->active_speed, &attr->active_width);
291 attr->max_msg_sz = rdma_port->max_msg_size;
292 attr->max_vl_num = 4;
294 return 0;
297 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
298 struct ib_port_modify *props)
300 struct qedr_dev *dev;
302 dev = get_qedr_dev(ibdev);
303 if (port > 1) {
304 DP_ERR(dev, "invalid_port=0x%x\n", port);
305 return -EINVAL;
308 return 0;
311 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
312 unsigned long len)
314 struct qedr_mm *mm;
316 mm = kzalloc(sizeof(*mm), GFP_KERNEL);
317 if (!mm)
318 return -ENOMEM;
320 mm->key.phy_addr = phy_addr;
321 /* This function might be called with a length which is not a multiple
322 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
323 * forces this granularity by increasing the requested size if needed.
324 * When qedr_mmap is called, it will search the list with the updated
325 * length as a key. To prevent search failures, the length is rounded up
326 * in advance to PAGE_SIZE.
328 mm->key.len = roundup(len, PAGE_SIZE);
329 INIT_LIST_HEAD(&mm->entry);
331 mutex_lock(&uctx->mm_list_lock);
332 list_add(&mm->entry, &uctx->mm_head);
333 mutex_unlock(&uctx->mm_list_lock);
335 DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
336 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
337 (unsigned long long)mm->key.phy_addr,
338 (unsigned long)mm->key.len, uctx);
340 return 0;
343 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
344 unsigned long len)
346 bool found = false;
347 struct qedr_mm *mm;
349 mutex_lock(&uctx->mm_list_lock);
350 list_for_each_entry(mm, &uctx->mm_head, entry) {
351 if (len != mm->key.len || phy_addr != mm->key.phy_addr)
352 continue;
354 found = true;
355 break;
357 mutex_unlock(&uctx->mm_list_lock);
358 DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
359 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
360 mm->key.phy_addr, mm->key.len, uctx, found);
362 return found;
365 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
366 struct ib_udata *udata)
368 int rc;
369 struct qedr_ucontext *ctx;
370 struct qedr_alloc_ucontext_resp uresp;
371 struct qedr_dev *dev = get_qedr_dev(ibdev);
372 struct qed_rdma_add_user_out_params oparams;
374 if (!udata)
375 return ERR_PTR(-EFAULT);
377 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
378 if (!ctx)
379 return ERR_PTR(-ENOMEM);
381 rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
382 if (rc) {
383 DP_ERR(dev,
384 "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
385 rc);
386 goto err;
389 ctx->dpi = oparams.dpi;
390 ctx->dpi_addr = oparams.dpi_addr;
391 ctx->dpi_phys_addr = oparams.dpi_phys_addr;
392 ctx->dpi_size = oparams.dpi_size;
393 INIT_LIST_HEAD(&ctx->mm_head);
394 mutex_init(&ctx->mm_list_lock);
396 memset(&uresp, 0, sizeof(uresp));
398 uresp.dpm_enabled = dev->user_dpm_enabled;
399 uresp.wids_enabled = 1;
400 uresp.wid_count = oparams.wid_count;
401 uresp.db_pa = ctx->dpi_phys_addr;
402 uresp.db_size = ctx->dpi_size;
403 uresp.max_send_wr = dev->attr.max_sqe;
404 uresp.max_recv_wr = dev->attr.max_rqe;
405 uresp.max_srq_wr = dev->attr.max_srq_wr;
406 uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
407 uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
408 uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
409 uresp.max_cqes = QEDR_MAX_CQES;
411 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
412 if (rc)
413 goto err;
415 ctx->dev = dev;
417 rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
418 if (rc)
419 goto err;
421 DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
422 &ctx->ibucontext);
423 return &ctx->ibucontext;
425 err:
426 kfree(ctx);
427 return ERR_PTR(rc);
430 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
432 struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
433 struct qedr_mm *mm, *tmp;
434 int status = 0;
436 DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
437 uctx);
438 uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
440 list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
441 DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
442 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
443 mm->key.phy_addr, mm->key.len, uctx);
444 list_del(&mm->entry);
445 kfree(mm);
448 kfree(uctx);
449 return status;
452 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
454 struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
455 struct qedr_dev *dev = get_qedr_dev(context->device);
456 unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
457 u64 unmapped_db = dev->db_phys_addr;
458 unsigned long len = (vma->vm_end - vma->vm_start);
459 int rc = 0;
460 bool found;
462 DP_DEBUG(dev, QEDR_MSG_INIT,
463 "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
464 vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
465 if (vma->vm_start & (PAGE_SIZE - 1)) {
466 DP_ERR(dev, "Vma_start not page aligned = %ld\n",
467 vma->vm_start);
468 return -EINVAL;
471 found = qedr_search_mmap(ucontext, vm_page, len);
472 if (!found) {
473 DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
474 vma->vm_pgoff);
475 return -EINVAL;
478 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
480 if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
481 dev->db_size))) {
482 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
483 if (vma->vm_flags & VM_READ) {
484 DP_ERR(dev, "Trying to map doorbell bar for read\n");
485 return -EPERM;
488 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
490 rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
491 PAGE_SIZE, vma->vm_page_prot);
492 } else {
493 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
494 rc = remap_pfn_range(vma, vma->vm_start,
495 vma->vm_pgoff, len, vma->vm_page_prot);
497 DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
498 return rc;
501 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
502 struct ib_ucontext *context, struct ib_udata *udata)
504 struct qedr_dev *dev = get_qedr_dev(ibdev);
505 struct qedr_pd *pd;
506 u16 pd_id;
507 int rc;
509 DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
510 (udata && context) ? "User Lib" : "Kernel");
512 if (!dev->rdma_ctx) {
513 DP_ERR(dev, "invalid RDMA context\n");
514 return ERR_PTR(-EINVAL);
517 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
518 if (!pd)
519 return ERR_PTR(-ENOMEM);
521 rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
522 if (rc)
523 goto err;
525 pd->pd_id = pd_id;
527 if (udata && context) {
528 struct qedr_alloc_pd_uresp uresp;
530 uresp.pd_id = pd_id;
532 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
533 if (rc) {
534 DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
535 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
536 goto err;
539 pd->uctx = get_qedr_ucontext(context);
540 pd->uctx->pd = pd;
543 return &pd->ibpd;
545 err:
546 kfree(pd);
547 return ERR_PTR(rc);
550 int qedr_dealloc_pd(struct ib_pd *ibpd)
552 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
553 struct qedr_pd *pd = get_qedr_pd(ibpd);
555 if (!pd) {
556 pr_err("Invalid PD received in dealloc_pd\n");
557 return -EINVAL;
560 DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
561 dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
563 kfree(pd);
565 return 0;
568 static void qedr_free_pbl(struct qedr_dev *dev,
569 struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
571 struct pci_dev *pdev = dev->pdev;
572 int i;
574 for (i = 0; i < pbl_info->num_pbls; i++) {
575 if (!pbl[i].va)
576 continue;
577 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
578 pbl[i].va, pbl[i].pa);
581 kfree(pbl);
584 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
585 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
587 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
588 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
589 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
591 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
592 struct qedr_pbl_info *pbl_info,
593 gfp_t flags)
595 struct pci_dev *pdev = dev->pdev;
596 struct qedr_pbl *pbl_table;
597 dma_addr_t *pbl_main_tbl;
598 dma_addr_t pa;
599 void *va;
600 int i;
602 pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
603 if (!pbl_table)
604 return ERR_PTR(-ENOMEM);
606 for (i = 0; i < pbl_info->num_pbls; i++) {
607 va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size,
608 &pa, flags);
609 if (!va)
610 goto err;
612 pbl_table[i].va = va;
613 pbl_table[i].pa = pa;
616 /* Two-Layer PBLs, if we have more than one pbl we need to initialize
617 * the first one with physical pointers to all of the rest
619 pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
620 for (i = 0; i < pbl_info->num_pbls - 1; i++)
621 pbl_main_tbl[i] = pbl_table[i + 1].pa;
623 return pbl_table;
625 err:
626 for (i--; i >= 0; i--)
627 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
628 pbl_table[i].va, pbl_table[i].pa);
630 qedr_free_pbl(dev, pbl_info, pbl_table);
632 return ERR_PTR(-ENOMEM);
635 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
636 struct qedr_pbl_info *pbl_info,
637 u32 num_pbes, int two_layer_capable)
639 u32 pbl_capacity;
640 u32 pbl_size;
641 u32 num_pbls;
643 if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
644 if (num_pbes > MAX_PBES_TWO_LAYER) {
645 DP_ERR(dev, "prepare pbl table: too many pages %d\n",
646 num_pbes);
647 return -EINVAL;
650 /* calculate required pbl page size */
651 pbl_size = MIN_FW_PBL_PAGE_SIZE;
652 pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
653 NUM_PBES_ON_PAGE(pbl_size);
655 while (pbl_capacity < num_pbes) {
656 pbl_size *= 2;
657 pbl_capacity = pbl_size / sizeof(u64);
658 pbl_capacity = pbl_capacity * pbl_capacity;
661 num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
662 num_pbls++; /* One for the layer0 ( points to the pbls) */
663 pbl_info->two_layered = true;
664 } else {
665 /* One layered PBL */
666 num_pbls = 1;
667 pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
668 roundup_pow_of_two((num_pbes * sizeof(u64))));
669 pbl_info->two_layered = false;
672 pbl_info->num_pbls = num_pbls;
673 pbl_info->pbl_size = pbl_size;
674 pbl_info->num_pbes = num_pbes;
676 DP_DEBUG(dev, QEDR_MSG_MR,
677 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
678 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
680 return 0;
683 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
684 struct qedr_pbl *pbl,
685 struct qedr_pbl_info *pbl_info, u32 pg_shift)
687 int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
688 u32 fw_pg_cnt, fw_pg_per_umem_pg;
689 struct qedr_pbl *pbl_tbl;
690 struct scatterlist *sg;
691 struct regpair *pbe;
692 u64 pg_addr;
693 int entry;
695 if (!pbl_info->num_pbes)
696 return;
698 /* If we have a two layered pbl, the first pbl points to the rest
699 * of the pbls and the first entry lays on the second pbl in the table
701 if (pbl_info->two_layered)
702 pbl_tbl = &pbl[1];
703 else
704 pbl_tbl = pbl;
706 pbe = (struct regpair *)pbl_tbl->va;
707 if (!pbe) {
708 DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
709 return;
712 pbe_cnt = 0;
714 shift = umem->page_shift;
716 fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
718 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
719 pages = sg_dma_len(sg) >> shift;
720 pg_addr = sg_dma_address(sg);
721 for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
722 for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
723 pbe->lo = cpu_to_le32(pg_addr);
724 pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
726 pg_addr += BIT(pg_shift);
727 pbe_cnt++;
728 total_num_pbes++;
729 pbe++;
731 if (total_num_pbes == pbl_info->num_pbes)
732 return;
734 /* If the given pbl is full storing the pbes,
735 * move to next pbl.
737 if (pbe_cnt ==
738 (pbl_info->pbl_size / sizeof(u64))) {
739 pbl_tbl++;
740 pbe = (struct regpair *)pbl_tbl->va;
741 pbe_cnt = 0;
744 fw_pg_cnt++;
750 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
751 struct qedr_cq *cq, struct ib_udata *udata)
753 struct qedr_create_cq_uresp uresp;
754 int rc;
756 memset(&uresp, 0, sizeof(uresp));
758 uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
759 uresp.icid = cq->icid;
761 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
762 if (rc)
763 DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
765 return rc;
768 static void consume_cqe(struct qedr_cq *cq)
770 if (cq->latest_cqe == cq->toggle_cqe)
771 cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
773 cq->latest_cqe = qed_chain_consume(&cq->pbl);
776 static inline int qedr_align_cq_entries(int entries)
778 u64 size, aligned_size;
780 /* We allocate an extra entry that we don't report to the FW. */
781 size = (entries + 1) * QEDR_CQE_SIZE;
782 aligned_size = ALIGN(size, PAGE_SIZE);
784 return aligned_size / QEDR_CQE_SIZE;
787 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
788 struct qedr_dev *dev,
789 struct qedr_userq *q,
790 u64 buf_addr, size_t buf_len,
791 int access, int dmasync,
792 int alloc_and_init)
794 u32 fw_pages;
795 int rc;
797 q->buf_addr = buf_addr;
798 q->buf_len = buf_len;
799 q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
800 if (IS_ERR(q->umem)) {
801 DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
802 PTR_ERR(q->umem));
803 return PTR_ERR(q->umem);
806 fw_pages = ib_umem_page_count(q->umem) <<
807 (q->umem->page_shift - FW_PAGE_SHIFT);
809 rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
810 if (rc)
811 goto err0;
813 if (alloc_and_init) {
814 q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
815 if (IS_ERR(q->pbl_tbl)) {
816 rc = PTR_ERR(q->pbl_tbl);
817 goto err0;
819 qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
820 FW_PAGE_SHIFT);
821 } else {
822 q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
823 if (!q->pbl_tbl) {
824 rc = -ENOMEM;
825 goto err0;
829 return 0;
831 err0:
832 ib_umem_release(q->umem);
833 q->umem = NULL;
835 return rc;
838 static inline void qedr_init_cq_params(struct qedr_cq *cq,
839 struct qedr_ucontext *ctx,
840 struct qedr_dev *dev, int vector,
841 int chain_entries, int page_cnt,
842 u64 pbl_ptr,
843 struct qed_rdma_create_cq_in_params
844 *params)
846 memset(params, 0, sizeof(*params));
847 params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
848 params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
849 params->cnq_id = vector;
850 params->cq_size = chain_entries - 1;
851 params->dpi = (ctx) ? ctx->dpi : dev->dpi;
852 params->pbl_num_pages = page_cnt;
853 params->pbl_ptr = pbl_ptr;
854 params->pbl_two_level = 0;
857 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
859 /* Flush data before signalling doorbell */
860 wmb();
861 cq->db.data.agg_flags = flags;
862 cq->db.data.value = cpu_to_le32(cons);
863 writeq(cq->db.raw, cq->db_addr);
865 /* Make sure write would stick */
866 mmiowb();
869 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
871 struct qedr_cq *cq = get_qedr_cq(ibcq);
872 unsigned long sflags;
873 struct qedr_dev *dev;
875 dev = get_qedr_dev(ibcq->device);
877 if (cq->destroyed) {
878 DP_ERR(dev,
879 "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
880 cq, cq->icid);
881 return -EINVAL;
885 if (cq->cq_type == QEDR_CQ_TYPE_GSI)
886 return 0;
888 spin_lock_irqsave(&cq->cq_lock, sflags);
890 cq->arm_flags = 0;
892 if (flags & IB_CQ_SOLICITED)
893 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
895 if (flags & IB_CQ_NEXT_COMP)
896 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
898 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
900 spin_unlock_irqrestore(&cq->cq_lock, sflags);
902 return 0;
905 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
906 const struct ib_cq_init_attr *attr,
907 struct ib_ucontext *ib_ctx, struct ib_udata *udata)
909 struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
910 struct qed_rdma_destroy_cq_out_params destroy_oparams;
911 struct qed_rdma_destroy_cq_in_params destroy_iparams;
912 struct qedr_dev *dev = get_qedr_dev(ibdev);
913 struct qed_rdma_create_cq_in_params params;
914 struct qedr_create_cq_ureq ureq;
915 int vector = attr->comp_vector;
916 int entries = attr->cqe;
917 struct qedr_cq *cq;
918 int chain_entries;
919 int page_cnt;
920 u64 pbl_ptr;
921 u16 icid;
922 int rc;
924 DP_DEBUG(dev, QEDR_MSG_INIT,
925 "create_cq: called from %s. entries=%d, vector=%d\n",
926 udata ? "User Lib" : "Kernel", entries, vector);
928 if (entries > QEDR_MAX_CQES) {
929 DP_ERR(dev,
930 "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
931 entries, QEDR_MAX_CQES);
932 return ERR_PTR(-EINVAL);
935 chain_entries = qedr_align_cq_entries(entries);
936 chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
938 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
939 if (!cq)
940 return ERR_PTR(-ENOMEM);
942 if (udata) {
943 memset(&ureq, 0, sizeof(ureq));
944 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
945 DP_ERR(dev,
946 "create cq: problem copying data from user space\n");
947 goto err0;
950 if (!ureq.len) {
951 DP_ERR(dev,
952 "create cq: cannot create a cq with 0 entries\n");
953 goto err0;
956 cq->cq_type = QEDR_CQ_TYPE_USER;
958 rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
959 ureq.len, IB_ACCESS_LOCAL_WRITE,
960 1, 1);
961 if (rc)
962 goto err0;
964 pbl_ptr = cq->q.pbl_tbl->pa;
965 page_cnt = cq->q.pbl_info.num_pbes;
967 cq->ibcq.cqe = chain_entries;
968 } else {
969 cq->cq_type = QEDR_CQ_TYPE_KERNEL;
971 rc = dev->ops->common->chain_alloc(dev->cdev,
972 QED_CHAIN_USE_TO_CONSUME,
973 QED_CHAIN_MODE_PBL,
974 QED_CHAIN_CNT_TYPE_U32,
975 chain_entries,
976 sizeof(union rdma_cqe),
977 &cq->pbl, NULL);
978 if (rc)
979 goto err1;
981 page_cnt = qed_chain_get_page_cnt(&cq->pbl);
982 pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
983 cq->ibcq.cqe = cq->pbl.capacity;
986 qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
987 pbl_ptr, &params);
989 rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
990 if (rc)
991 goto err2;
993 cq->icid = icid;
994 cq->sig = QEDR_CQ_MAGIC_NUMBER;
995 spin_lock_init(&cq->cq_lock);
997 if (ib_ctx) {
998 rc = qedr_copy_cq_uresp(dev, cq, udata);
999 if (rc)
1000 goto err3;
1001 } else {
1002 /* Generate doorbell address. */
1003 cq->db_addr = dev->db_addr +
1004 DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
1005 cq->db.data.icid = cq->icid;
1006 cq->db.data.params = DB_AGG_CMD_SET <<
1007 RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
1009 /* point to the very last element, passing it we will toggle */
1010 cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
1011 cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
1012 cq->latest_cqe = NULL;
1013 consume_cqe(cq);
1014 cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
1017 DP_DEBUG(dev, QEDR_MSG_CQ,
1018 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
1019 cq->icid, cq, params.cq_size);
1021 return &cq->ibcq;
1023 err3:
1024 destroy_iparams.icid = cq->icid;
1025 dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
1026 &destroy_oparams);
1027 err2:
1028 if (udata)
1029 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1030 else
1031 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1032 err1:
1033 if (udata)
1034 ib_umem_release(cq->q.umem);
1035 err0:
1036 kfree(cq);
1037 return ERR_PTR(-EINVAL);
1040 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
1042 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1043 struct qedr_cq *cq = get_qedr_cq(ibcq);
1045 DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
1047 return 0;
1050 #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
1051 #define QEDR_DESTROY_CQ_ITER_DURATION (10)
1053 int qedr_destroy_cq(struct ib_cq *ibcq)
1055 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1056 struct qed_rdma_destroy_cq_out_params oparams;
1057 struct qed_rdma_destroy_cq_in_params iparams;
1058 struct qedr_cq *cq = get_qedr_cq(ibcq);
1059 int iter;
1060 int rc;
1062 DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1064 cq->destroyed = 1;
1066 /* GSIs CQs are handled by driver, so they don't exist in the FW */
1067 if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1068 goto done;
1070 iparams.icid = cq->icid;
1071 rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1072 if (rc)
1073 return rc;
1075 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1077 if (ibcq->uobject && ibcq->uobject->context) {
1078 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1079 ib_umem_release(cq->q.umem);
1082 /* We don't want the IRQ handler to handle a non-existing CQ so we
1083 * wait until all CNQ interrupts, if any, are received. This will always
1084 * happen and will always happen very fast. If not, then a serious error
1085 * has occured. That is why we can use a long delay.
1086 * We spin for a short time so we don’t lose time on context switching
1087 * in case all the completions are handled in that span. Otherwise
1088 * we sleep for a while and check again. Since the CNQ may be
1089 * associated with (only) the current CPU we use msleep to allow the
1090 * current CPU to be freed.
1091 * The CNQ notification is increased in qedr_irq_handler().
1093 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1094 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1095 udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1096 iter--;
1099 iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1100 while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1101 msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1102 iter--;
1105 if (oparams.num_cq_notif != cq->cnq_notif)
1106 goto err;
1108 /* Note that we don't need to have explicit code to wait for the
1109 * completion of the event handler because it is invoked from the EQ.
1110 * Since the destroy CQ ramrod has also been received on the EQ we can
1111 * be certain that there's no event handler in process.
1113 done:
1114 cq->sig = ~cq->sig;
1116 kfree(cq);
1118 return 0;
1120 err:
1121 DP_ERR(dev,
1122 "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1123 cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1125 return -EINVAL;
1128 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1129 struct ib_qp_attr *attr,
1130 int attr_mask,
1131 struct qed_rdma_modify_qp_in_params
1132 *qp_params)
1134 enum rdma_network_type nw_type;
1135 struct ib_gid_attr gid_attr;
1136 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1137 union ib_gid gid;
1138 u32 ipv4_addr;
1139 int rc = 0;
1140 int i;
1142 rc = ib_get_cached_gid(ibqp->device,
1143 rdma_ah_get_port_num(&attr->ah_attr),
1144 grh->sgid_index, &gid, &gid_attr);
1145 if (rc)
1146 return rc;
1148 if (!memcmp(&gid, &zgid, sizeof(gid)))
1149 return -ENOENT;
1151 if (gid_attr.ndev) {
1152 qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1154 dev_put(gid_attr.ndev);
1155 nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1156 switch (nw_type) {
1157 case RDMA_NETWORK_IPV6:
1158 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1159 sizeof(qp_params->sgid));
1160 memcpy(&qp_params->dgid.bytes[0],
1161 &grh->dgid,
1162 sizeof(qp_params->dgid));
1163 qp_params->roce_mode = ROCE_V2_IPV6;
1164 SET_FIELD(qp_params->modify_flags,
1165 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1166 break;
1167 case RDMA_NETWORK_IB:
1168 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1169 sizeof(qp_params->sgid));
1170 memcpy(&qp_params->dgid.bytes[0],
1171 &grh->dgid,
1172 sizeof(qp_params->dgid));
1173 qp_params->roce_mode = ROCE_V1;
1174 break;
1175 case RDMA_NETWORK_IPV4:
1176 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1177 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1178 ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1179 qp_params->sgid.ipv4_addr = ipv4_addr;
1180 ipv4_addr =
1181 qedr_get_ipv4_from_gid(grh->dgid.raw);
1182 qp_params->dgid.ipv4_addr = ipv4_addr;
1183 SET_FIELD(qp_params->modify_flags,
1184 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1185 qp_params->roce_mode = ROCE_V2_IPV4;
1186 break;
1190 for (i = 0; i < 4; i++) {
1191 qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1192 qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1195 if (qp_params->vlan_id >= VLAN_CFI_MASK)
1196 qp_params->vlan_id = 0;
1198 return 0;
1201 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1202 struct ib_qp_init_attr *attrs)
1204 struct qedr_device_attr *qattr = &dev->attr;
1206 /* QP0... attrs->qp_type == IB_QPT_GSI */
1207 if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1208 DP_DEBUG(dev, QEDR_MSG_QP,
1209 "create qp: unsupported qp type=0x%x requested\n",
1210 attrs->qp_type);
1211 return -EINVAL;
1214 if (attrs->cap.max_send_wr > qattr->max_sqe) {
1215 DP_ERR(dev,
1216 "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1217 attrs->cap.max_send_wr, qattr->max_sqe);
1218 return -EINVAL;
1221 if (attrs->cap.max_inline_data > qattr->max_inline) {
1222 DP_ERR(dev,
1223 "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1224 attrs->cap.max_inline_data, qattr->max_inline);
1225 return -EINVAL;
1228 if (attrs->cap.max_send_sge > qattr->max_sge) {
1229 DP_ERR(dev,
1230 "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1231 attrs->cap.max_send_sge, qattr->max_sge);
1232 return -EINVAL;
1235 if (attrs->cap.max_recv_sge > qattr->max_sge) {
1236 DP_ERR(dev,
1237 "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1238 attrs->cap.max_recv_sge, qattr->max_sge);
1239 return -EINVAL;
1242 /* Unprivileged user space cannot create special QP */
1243 if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1244 DP_ERR(dev,
1245 "create qp: userspace can't create special QPs of type=0x%x\n",
1246 attrs->qp_type);
1247 return -EINVAL;
1250 return 0;
1253 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1254 struct qedr_create_qp_uresp *uresp,
1255 struct qedr_qp *qp)
1257 /* iWARP requires two doorbells per RQ. */
1258 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1259 uresp->rq_db_offset =
1260 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1261 uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1262 } else {
1263 uresp->rq_db_offset =
1264 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1267 uresp->rq_icid = qp->icid;
1270 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1271 struct qedr_create_qp_uresp *uresp,
1272 struct qedr_qp *qp)
1274 uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1276 /* iWARP uses the same cid for rq and sq */
1277 if (rdma_protocol_iwarp(&dev->ibdev, 1))
1278 uresp->sq_icid = qp->icid;
1279 else
1280 uresp->sq_icid = qp->icid + 1;
1283 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1284 struct qedr_qp *qp, struct ib_udata *udata)
1286 struct qedr_create_qp_uresp uresp;
1287 int rc;
1289 memset(&uresp, 0, sizeof(uresp));
1290 qedr_copy_sq_uresp(dev, &uresp, qp);
1291 qedr_copy_rq_uresp(dev, &uresp, qp);
1293 uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1294 uresp.qp_id = qp->qp_id;
1296 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1297 if (rc)
1298 DP_ERR(dev,
1299 "create qp: failed a copy to user space with qp icid=0x%x.\n",
1300 qp->icid);
1302 return rc;
1305 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1306 struct qedr_qp *qp,
1307 struct qedr_pd *pd,
1308 struct ib_qp_init_attr *attrs)
1310 spin_lock_init(&qp->q_lock);
1311 atomic_set(&qp->refcnt, 1);
1312 qp->pd = pd;
1313 qp->qp_type = attrs->qp_type;
1314 qp->max_inline_data = attrs->cap.max_inline_data;
1315 qp->sq.max_sges = attrs->cap.max_send_sge;
1316 qp->state = QED_ROCE_QP_STATE_RESET;
1317 qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1318 qp->sq_cq = get_qedr_cq(attrs->send_cq);
1319 qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1320 qp->dev = dev;
1321 qp->rq.max_sges = attrs->cap.max_recv_sge;
1323 DP_DEBUG(dev, QEDR_MSG_QP,
1324 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1325 qp->rq.max_sges, qp->rq_cq->icid);
1326 DP_DEBUG(dev, QEDR_MSG_QP,
1327 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1328 pd->pd_id, qp->qp_type, qp->max_inline_data,
1329 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1330 DP_DEBUG(dev, QEDR_MSG_QP,
1331 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1332 qp->sq.max_sges, qp->sq_cq->icid);
1335 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1337 qp->sq.db = dev->db_addr +
1338 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1339 qp->sq.db_data.data.icid = qp->icid + 1;
1340 qp->rq.db = dev->db_addr +
1341 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1342 qp->rq.db_data.data.icid = qp->icid;
1345 static inline void
1346 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1347 struct qedr_pd *pd,
1348 struct qedr_qp *qp,
1349 struct ib_qp_init_attr *attrs,
1350 bool fmr_and_reserved_lkey,
1351 struct qed_rdma_create_qp_in_params *params)
1353 /* QP handle to be written in an async event */
1354 params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1355 params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1357 params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1358 params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1359 params->pd = pd->pd_id;
1360 params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1361 params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1362 params->stats_queue = 0;
1363 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1364 params->srq_id = 0;
1365 params->use_srq = false;
1368 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1370 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1371 "qp=%p. "
1372 "sq_addr=0x%llx, "
1373 "sq_len=%zd, "
1374 "rq_addr=0x%llx, "
1375 "rq_len=%zd"
1376 "\n",
1378 qp->usq.buf_addr,
1379 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1382 static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id)
1384 int rc;
1386 if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1387 return 0;
1389 idr_preload(GFP_KERNEL);
1390 spin_lock_irq(&dev->idr_lock);
1392 rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC);
1394 spin_unlock_irq(&dev->idr_lock);
1395 idr_preload_end();
1397 return rc < 0 ? rc : 0;
1400 static void qedr_idr_remove(struct qedr_dev *dev, u32 id)
1402 if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1403 return;
1405 spin_lock_irq(&dev->idr_lock);
1406 idr_remove(&dev->qpidr, id);
1407 spin_unlock_irq(&dev->idr_lock);
1410 static inline void
1411 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1412 struct qedr_qp *qp,
1413 struct qed_rdma_create_qp_out_params *out_params)
1415 qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1416 qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1418 qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1419 &qp->usq.pbl_info, FW_PAGE_SHIFT);
1421 qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1422 qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1424 qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1425 &qp->urq.pbl_info, FW_PAGE_SHIFT);
1428 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1430 if (qp->usq.umem)
1431 ib_umem_release(qp->usq.umem);
1432 qp->usq.umem = NULL;
1434 if (qp->urq.umem)
1435 ib_umem_release(qp->urq.umem);
1436 qp->urq.umem = NULL;
1439 static int qedr_create_user_qp(struct qedr_dev *dev,
1440 struct qedr_qp *qp,
1441 struct ib_pd *ibpd,
1442 struct ib_udata *udata,
1443 struct ib_qp_init_attr *attrs)
1445 struct qed_rdma_create_qp_in_params in_params;
1446 struct qed_rdma_create_qp_out_params out_params;
1447 struct qedr_pd *pd = get_qedr_pd(ibpd);
1448 struct ib_ucontext *ib_ctx = NULL;
1449 struct qedr_create_qp_ureq ureq;
1450 int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1451 int rc = -EINVAL;
1453 ib_ctx = ibpd->uobject->context;
1455 memset(&ureq, 0, sizeof(ureq));
1456 rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1457 if (rc) {
1458 DP_ERR(dev, "Problem copying data from user space\n");
1459 return rc;
1462 /* SQ - read access only (0), dma sync not required (0) */
1463 rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1464 ureq.sq_len, 0, 0, alloc_and_init);
1465 if (rc)
1466 return rc;
1468 /* RQ - read access only (0), dma sync not required (0) */
1469 rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1470 ureq.rq_len, 0, 0, alloc_and_init);
1471 if (rc)
1472 return rc;
1474 memset(&in_params, 0, sizeof(in_params));
1475 qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1476 in_params.qp_handle_lo = ureq.qp_handle_lo;
1477 in_params.qp_handle_hi = ureq.qp_handle_hi;
1478 in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1479 in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1480 in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1481 in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1483 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1484 &in_params, &out_params);
1486 if (!qp->qed_qp) {
1487 rc = -ENOMEM;
1488 goto err1;
1491 if (rdma_protocol_iwarp(&dev->ibdev, 1))
1492 qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1494 qp->qp_id = out_params.qp_id;
1495 qp->icid = out_params.icid;
1497 rc = qedr_copy_qp_uresp(dev, qp, udata);
1498 if (rc)
1499 goto err;
1501 qedr_qp_user_print(dev, qp);
1503 return 0;
1504 err:
1505 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1506 if (rc)
1507 DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1509 err1:
1510 qedr_cleanup_user(dev, qp);
1511 return rc;
1514 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1516 qp->sq.db = dev->db_addr +
1517 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1518 qp->sq.db_data.data.icid = qp->icid;
1520 qp->rq.db = dev->db_addr +
1521 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1522 qp->rq.db_data.data.icid = qp->icid;
1523 qp->rq.iwarp_db2 = dev->db_addr +
1524 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1525 qp->rq.iwarp_db2_data.data.icid = qp->icid;
1526 qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1529 static int
1530 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1531 struct qedr_qp *qp,
1532 struct qed_rdma_create_qp_in_params *in_params,
1533 u32 n_sq_elems, u32 n_rq_elems)
1535 struct qed_rdma_create_qp_out_params out_params;
1536 int rc;
1538 rc = dev->ops->common->chain_alloc(dev->cdev,
1539 QED_CHAIN_USE_TO_PRODUCE,
1540 QED_CHAIN_MODE_PBL,
1541 QED_CHAIN_CNT_TYPE_U32,
1542 n_sq_elems,
1543 QEDR_SQE_ELEMENT_SIZE,
1544 &qp->sq.pbl, NULL);
1546 if (rc)
1547 return rc;
1549 in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1550 in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1552 rc = dev->ops->common->chain_alloc(dev->cdev,
1553 QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1554 QED_CHAIN_MODE_PBL,
1555 QED_CHAIN_CNT_TYPE_U32,
1556 n_rq_elems,
1557 QEDR_RQE_ELEMENT_SIZE,
1558 &qp->rq.pbl, NULL);
1559 if (rc)
1560 return rc;
1562 in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1563 in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1565 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1566 in_params, &out_params);
1568 if (!qp->qed_qp)
1569 return -EINVAL;
1571 qp->qp_id = out_params.qp_id;
1572 qp->icid = out_params.icid;
1574 qedr_set_roce_db_info(dev, qp);
1575 return rc;
1578 static int
1579 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1580 struct qedr_qp *qp,
1581 struct qed_rdma_create_qp_in_params *in_params,
1582 u32 n_sq_elems, u32 n_rq_elems)
1584 struct qed_rdma_create_qp_out_params out_params;
1585 struct qed_chain_ext_pbl ext_pbl;
1586 int rc;
1588 in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1589 QEDR_SQE_ELEMENT_SIZE,
1590 QED_CHAIN_MODE_PBL);
1591 in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1592 QEDR_RQE_ELEMENT_SIZE,
1593 QED_CHAIN_MODE_PBL);
1595 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1596 in_params, &out_params);
1598 if (!qp->qed_qp)
1599 return -EINVAL;
1601 /* Now we allocate the chain */
1602 ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1603 ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1605 rc = dev->ops->common->chain_alloc(dev->cdev,
1606 QED_CHAIN_USE_TO_PRODUCE,
1607 QED_CHAIN_MODE_PBL,
1608 QED_CHAIN_CNT_TYPE_U32,
1609 n_sq_elems,
1610 QEDR_SQE_ELEMENT_SIZE,
1611 &qp->sq.pbl, &ext_pbl);
1613 if (rc)
1614 goto err;
1616 ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1617 ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1619 rc = dev->ops->common->chain_alloc(dev->cdev,
1620 QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1621 QED_CHAIN_MODE_PBL,
1622 QED_CHAIN_CNT_TYPE_U32,
1623 n_rq_elems,
1624 QEDR_RQE_ELEMENT_SIZE,
1625 &qp->rq.pbl, &ext_pbl);
1627 if (rc)
1628 goto err;
1630 qp->qp_id = out_params.qp_id;
1631 qp->icid = out_params.icid;
1633 qedr_set_iwarp_db_info(dev, qp);
1634 return rc;
1636 err:
1637 dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1639 return rc;
1642 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1644 dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1645 kfree(qp->wqe_wr_id);
1647 dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1648 kfree(qp->rqe_wr_id);
1651 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1652 struct qedr_qp *qp,
1653 struct ib_pd *ibpd,
1654 struct ib_qp_init_attr *attrs)
1656 struct qed_rdma_create_qp_in_params in_params;
1657 struct qedr_pd *pd = get_qedr_pd(ibpd);
1658 int rc = -EINVAL;
1659 u32 n_rq_elems;
1660 u32 n_sq_elems;
1661 u32 n_sq_entries;
1663 memset(&in_params, 0, sizeof(in_params));
1665 /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1666 * the ring. The ring should allow at least a single WR, even if the
1667 * user requested none, due to allocation issues.
1668 * We should add an extra WR since the prod and cons indices of
1669 * wqe_wr_id are managed in such a way that the WQ is considered full
1670 * when (prod+1)%max_wr==cons. We currently don't do that because we
1671 * double the number of entries due an iSER issue that pushes far more
1672 * WRs than indicated. If we decline its ib_post_send() then we get
1673 * error prints in the dmesg we'd like to avoid.
1675 qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1676 dev->attr.max_sqe);
1678 qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1679 GFP_KERNEL);
1680 if (!qp->wqe_wr_id) {
1681 DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1682 return -ENOMEM;
1685 /* QP handle to be written in CQE */
1686 in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1687 in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1689 /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1690 * the ring. There ring should allow at least a single WR, even if the
1691 * user requested none, due to allocation issues.
1693 qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1695 /* Allocate driver internal RQ array */
1696 qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1697 GFP_KERNEL);
1698 if (!qp->rqe_wr_id) {
1699 DP_ERR(dev,
1700 "create qp: failed RQ shadow memory allocation\n");
1701 kfree(qp->wqe_wr_id);
1702 return -ENOMEM;
1705 qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1707 n_sq_entries = attrs->cap.max_send_wr;
1708 n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1709 n_sq_entries = max_t(u32, n_sq_entries, 1);
1710 n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1712 n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1714 if (rdma_protocol_iwarp(&dev->ibdev, 1))
1715 rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1716 n_sq_elems, n_rq_elems);
1717 else
1718 rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1719 n_sq_elems, n_rq_elems);
1720 if (rc)
1721 qedr_cleanup_kernel(dev, qp);
1723 return rc;
1726 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1727 struct ib_qp_init_attr *attrs,
1728 struct ib_udata *udata)
1730 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1731 struct qedr_pd *pd = get_qedr_pd(ibpd);
1732 struct qedr_qp *qp;
1733 struct ib_qp *ibqp;
1734 int rc = 0;
1736 DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1737 udata ? "user library" : "kernel", pd);
1739 rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1740 if (rc)
1741 return ERR_PTR(rc);
1743 if (attrs->srq)
1744 return ERR_PTR(-EINVAL);
1746 DP_DEBUG(dev, QEDR_MSG_QP,
1747 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1748 udata ? "user library" : "kernel", attrs->event_handler, pd,
1749 get_qedr_cq(attrs->send_cq),
1750 get_qedr_cq(attrs->send_cq)->icid,
1751 get_qedr_cq(attrs->recv_cq),
1752 get_qedr_cq(attrs->recv_cq)->icid);
1754 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1755 if (!qp) {
1756 DP_ERR(dev, "create qp: failed allocating memory\n");
1757 return ERR_PTR(-ENOMEM);
1760 qedr_set_common_qp_params(dev, qp, pd, attrs);
1762 if (attrs->qp_type == IB_QPT_GSI) {
1763 ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1764 if (IS_ERR(ibqp))
1765 kfree(qp);
1766 return ibqp;
1769 if (udata)
1770 rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1771 else
1772 rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1774 if (rc)
1775 goto err;
1777 qp->ibqp.qp_num = qp->qp_id;
1779 rc = qedr_idr_add(dev, qp, qp->qp_id);
1780 if (rc)
1781 goto err;
1783 return &qp->ibqp;
1785 err:
1786 kfree(qp);
1788 return ERR_PTR(-EFAULT);
1791 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1793 switch (qp_state) {
1794 case QED_ROCE_QP_STATE_RESET:
1795 return IB_QPS_RESET;
1796 case QED_ROCE_QP_STATE_INIT:
1797 return IB_QPS_INIT;
1798 case QED_ROCE_QP_STATE_RTR:
1799 return IB_QPS_RTR;
1800 case QED_ROCE_QP_STATE_RTS:
1801 return IB_QPS_RTS;
1802 case QED_ROCE_QP_STATE_SQD:
1803 return IB_QPS_SQD;
1804 case QED_ROCE_QP_STATE_ERR:
1805 return IB_QPS_ERR;
1806 case QED_ROCE_QP_STATE_SQE:
1807 return IB_QPS_SQE;
1809 return IB_QPS_ERR;
1812 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1813 enum ib_qp_state qp_state)
1815 switch (qp_state) {
1816 case IB_QPS_RESET:
1817 return QED_ROCE_QP_STATE_RESET;
1818 case IB_QPS_INIT:
1819 return QED_ROCE_QP_STATE_INIT;
1820 case IB_QPS_RTR:
1821 return QED_ROCE_QP_STATE_RTR;
1822 case IB_QPS_RTS:
1823 return QED_ROCE_QP_STATE_RTS;
1824 case IB_QPS_SQD:
1825 return QED_ROCE_QP_STATE_SQD;
1826 case IB_QPS_ERR:
1827 return QED_ROCE_QP_STATE_ERR;
1828 default:
1829 return QED_ROCE_QP_STATE_ERR;
1833 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1835 qed_chain_reset(&qph->pbl);
1836 qph->prod = 0;
1837 qph->cons = 0;
1838 qph->wqe_cons = 0;
1839 qph->db_data.data.value = cpu_to_le16(0);
1842 static int qedr_update_qp_state(struct qedr_dev *dev,
1843 struct qedr_qp *qp,
1844 enum qed_roce_qp_state new_state)
1846 int status = 0;
1848 if (new_state == qp->state)
1849 return 0;
1851 switch (qp->state) {
1852 case QED_ROCE_QP_STATE_RESET:
1853 switch (new_state) {
1854 case QED_ROCE_QP_STATE_INIT:
1855 qp->prev_wqe_size = 0;
1856 qedr_reset_qp_hwq_info(&qp->sq);
1857 qedr_reset_qp_hwq_info(&qp->rq);
1858 break;
1859 default:
1860 status = -EINVAL;
1861 break;
1863 break;
1864 case QED_ROCE_QP_STATE_INIT:
1865 switch (new_state) {
1866 case QED_ROCE_QP_STATE_RTR:
1867 /* Update doorbell (in case post_recv was
1868 * done before move to RTR)
1871 if (rdma_protocol_roce(&dev->ibdev, 1)) {
1872 wmb();
1873 writel(qp->rq.db_data.raw, qp->rq.db);
1874 /* Make sure write takes effect */
1875 mmiowb();
1877 break;
1878 case QED_ROCE_QP_STATE_ERR:
1879 break;
1880 default:
1881 /* Invalid state change. */
1882 status = -EINVAL;
1883 break;
1885 break;
1886 case QED_ROCE_QP_STATE_RTR:
1887 /* RTR->XXX */
1888 switch (new_state) {
1889 case QED_ROCE_QP_STATE_RTS:
1890 break;
1891 case QED_ROCE_QP_STATE_ERR:
1892 break;
1893 default:
1894 /* Invalid state change. */
1895 status = -EINVAL;
1896 break;
1898 break;
1899 case QED_ROCE_QP_STATE_RTS:
1900 /* RTS->XXX */
1901 switch (new_state) {
1902 case QED_ROCE_QP_STATE_SQD:
1903 break;
1904 case QED_ROCE_QP_STATE_ERR:
1905 break;
1906 default:
1907 /* Invalid state change. */
1908 status = -EINVAL;
1909 break;
1911 break;
1912 case QED_ROCE_QP_STATE_SQD:
1913 /* SQD->XXX */
1914 switch (new_state) {
1915 case QED_ROCE_QP_STATE_RTS:
1916 case QED_ROCE_QP_STATE_ERR:
1917 break;
1918 default:
1919 /* Invalid state change. */
1920 status = -EINVAL;
1921 break;
1923 break;
1924 case QED_ROCE_QP_STATE_ERR:
1925 /* ERR->XXX */
1926 switch (new_state) {
1927 case QED_ROCE_QP_STATE_RESET:
1928 if ((qp->rq.prod != qp->rq.cons) ||
1929 (qp->sq.prod != qp->sq.cons)) {
1930 DP_NOTICE(dev,
1931 "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1932 qp->rq.prod, qp->rq.cons, qp->sq.prod,
1933 qp->sq.cons);
1934 status = -EINVAL;
1936 break;
1937 default:
1938 status = -EINVAL;
1939 break;
1941 break;
1942 default:
1943 status = -EINVAL;
1944 break;
1947 return status;
1950 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1951 int attr_mask, struct ib_udata *udata)
1953 struct qedr_qp *qp = get_qedr_qp(ibqp);
1954 struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1955 struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1956 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1957 enum ib_qp_state old_qp_state, new_qp_state;
1958 int rc = 0;
1960 DP_DEBUG(dev, QEDR_MSG_QP,
1961 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1962 attr->qp_state);
1964 old_qp_state = qedr_get_ibqp_state(qp->state);
1965 if (attr_mask & IB_QP_STATE)
1966 new_qp_state = attr->qp_state;
1967 else
1968 new_qp_state = old_qp_state;
1970 if (rdma_protocol_roce(&dev->ibdev, 1)) {
1971 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
1972 ibqp->qp_type, attr_mask,
1973 IB_LINK_LAYER_ETHERNET)) {
1974 DP_ERR(dev,
1975 "modify qp: invalid attribute mask=0x%x specified for\n"
1976 "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1977 attr_mask, qp->qp_id, ibqp->qp_type,
1978 old_qp_state, new_qp_state);
1979 rc = -EINVAL;
1980 goto err;
1984 /* Translate the masks... */
1985 if (attr_mask & IB_QP_STATE) {
1986 SET_FIELD(qp_params.modify_flags,
1987 QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1988 qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1991 if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1992 qp_params.sqd_async = true;
1994 if (attr_mask & IB_QP_PKEY_INDEX) {
1995 SET_FIELD(qp_params.modify_flags,
1996 QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1997 if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1998 rc = -EINVAL;
1999 goto err;
2002 qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2005 if (attr_mask & IB_QP_QKEY)
2006 qp->qkey = attr->qkey;
2008 if (attr_mask & IB_QP_ACCESS_FLAGS) {
2009 SET_FIELD(qp_params.modify_flags,
2010 QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2011 qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2012 IB_ACCESS_REMOTE_READ;
2013 qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2014 IB_ACCESS_REMOTE_WRITE;
2015 qp_params.incoming_atomic_en = attr->qp_access_flags &
2016 IB_ACCESS_REMOTE_ATOMIC;
2019 if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2020 if (attr_mask & IB_QP_PATH_MTU) {
2021 if (attr->path_mtu < IB_MTU_256 ||
2022 attr->path_mtu > IB_MTU_4096) {
2023 pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2024 rc = -EINVAL;
2025 goto err;
2027 qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2028 ib_mtu_enum_to_int(iboe_get_mtu
2029 (dev->ndev->mtu)));
2032 if (!qp->mtu) {
2033 qp->mtu =
2034 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2035 pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2038 SET_FIELD(qp_params.modify_flags,
2039 QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2041 qp_params.traffic_class_tos = grh->traffic_class;
2042 qp_params.flow_label = grh->flow_label;
2043 qp_params.hop_limit_ttl = grh->hop_limit;
2045 qp->sgid_idx = grh->sgid_index;
2047 rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2048 if (rc) {
2049 DP_ERR(dev,
2050 "modify qp: problems with GID index %d (rc=%d)\n",
2051 grh->sgid_index, rc);
2052 return rc;
2055 rc = qedr_get_dmac(dev, &attr->ah_attr,
2056 qp_params.remote_mac_addr);
2057 if (rc)
2058 return rc;
2060 qp_params.use_local_mac = true;
2061 ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2063 DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2064 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2065 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2066 DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2067 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2068 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2069 DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2070 qp_params.remote_mac_addr);
2072 qp_params.mtu = qp->mtu;
2073 qp_params.lb_indication = false;
2076 if (!qp_params.mtu) {
2077 /* Stay with current MTU */
2078 if (qp->mtu)
2079 qp_params.mtu = qp->mtu;
2080 else
2081 qp_params.mtu =
2082 ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2085 if (attr_mask & IB_QP_TIMEOUT) {
2086 SET_FIELD(qp_params.modify_flags,
2087 QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2089 qp_params.ack_timeout = attr->timeout;
2090 if (attr->timeout) {
2091 u32 temp;
2093 temp = 4096 * (1UL << attr->timeout) / 1000 / 1000;
2094 /* FW requires [msec] */
2095 qp_params.ack_timeout = temp;
2096 } else {
2097 /* Infinite */
2098 qp_params.ack_timeout = 0;
2101 if (attr_mask & IB_QP_RETRY_CNT) {
2102 SET_FIELD(qp_params.modify_flags,
2103 QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2104 qp_params.retry_cnt = attr->retry_cnt;
2107 if (attr_mask & IB_QP_RNR_RETRY) {
2108 SET_FIELD(qp_params.modify_flags,
2109 QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2110 qp_params.rnr_retry_cnt = attr->rnr_retry;
2113 if (attr_mask & IB_QP_RQ_PSN) {
2114 SET_FIELD(qp_params.modify_flags,
2115 QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2116 qp_params.rq_psn = attr->rq_psn;
2117 qp->rq_psn = attr->rq_psn;
2120 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2121 if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2122 rc = -EINVAL;
2123 DP_ERR(dev,
2124 "unsupported max_rd_atomic=%d, supported=%d\n",
2125 attr->max_rd_atomic,
2126 dev->attr.max_qp_req_rd_atomic_resc);
2127 goto err;
2130 SET_FIELD(qp_params.modify_flags,
2131 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2132 qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2135 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2136 SET_FIELD(qp_params.modify_flags,
2137 QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2138 qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2141 if (attr_mask & IB_QP_SQ_PSN) {
2142 SET_FIELD(qp_params.modify_flags,
2143 QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2144 qp_params.sq_psn = attr->sq_psn;
2145 qp->sq_psn = attr->sq_psn;
2148 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2149 if (attr->max_dest_rd_atomic >
2150 dev->attr.max_qp_resp_rd_atomic_resc) {
2151 DP_ERR(dev,
2152 "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2153 attr->max_dest_rd_atomic,
2154 dev->attr.max_qp_resp_rd_atomic_resc);
2156 rc = -EINVAL;
2157 goto err;
2160 SET_FIELD(qp_params.modify_flags,
2161 QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2162 qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2165 if (attr_mask & IB_QP_DEST_QPN) {
2166 SET_FIELD(qp_params.modify_flags,
2167 QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2169 qp_params.dest_qp = attr->dest_qp_num;
2170 qp->dest_qp_num = attr->dest_qp_num;
2173 if (qp->qp_type != IB_QPT_GSI)
2174 rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2175 qp->qed_qp, &qp_params);
2177 if (attr_mask & IB_QP_STATE) {
2178 if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2179 rc = qedr_update_qp_state(dev, qp, qp_params.new_state);
2180 qp->state = qp_params.new_state;
2183 err:
2184 return rc;
2187 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2189 int ib_qp_acc_flags = 0;
2191 if (params->incoming_rdma_write_en)
2192 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2193 if (params->incoming_rdma_read_en)
2194 ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2195 if (params->incoming_atomic_en)
2196 ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2197 ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2198 return ib_qp_acc_flags;
2201 int qedr_query_qp(struct ib_qp *ibqp,
2202 struct ib_qp_attr *qp_attr,
2203 int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2205 struct qed_rdma_query_qp_out_params params;
2206 struct qedr_qp *qp = get_qedr_qp(ibqp);
2207 struct qedr_dev *dev = qp->dev;
2208 int rc = 0;
2210 memset(&params, 0, sizeof(params));
2212 rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2213 if (rc)
2214 goto err;
2216 memset(qp_attr, 0, sizeof(*qp_attr));
2217 memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2219 qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2220 qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2221 qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2222 qp_attr->path_mig_state = IB_MIG_MIGRATED;
2223 qp_attr->rq_psn = params.rq_psn;
2224 qp_attr->sq_psn = params.sq_psn;
2225 qp_attr->dest_qp_num = params.dest_qp;
2227 qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2229 qp_attr->cap.max_send_wr = qp->sq.max_wr;
2230 qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2231 qp_attr->cap.max_send_sge = qp->sq.max_sges;
2232 qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2233 qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2234 qp_init_attr->cap = qp_attr->cap;
2236 qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2237 rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2238 params.flow_label, qp->sgid_idx,
2239 params.hop_limit_ttl, params.traffic_class_tos);
2240 rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2241 rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2242 rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2243 qp_attr->timeout = params.timeout;
2244 qp_attr->rnr_retry = params.rnr_retry;
2245 qp_attr->retry_cnt = params.retry_cnt;
2246 qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2247 qp_attr->pkey_index = params.pkey_index;
2248 qp_attr->port_num = 1;
2249 rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2250 rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2251 qp_attr->alt_pkey_index = 0;
2252 qp_attr->alt_port_num = 0;
2253 qp_attr->alt_timeout = 0;
2254 memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2256 qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2257 qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2258 qp_attr->max_rd_atomic = params.max_rd_atomic;
2259 qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2261 DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2262 qp_attr->cap.max_inline_data);
2264 err:
2265 return rc;
2268 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2270 int rc = 0;
2272 if (qp->qp_type != IB_QPT_GSI) {
2273 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2274 if (rc)
2275 return rc;
2278 if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2279 qedr_cleanup_user(dev, qp);
2280 else
2281 qedr_cleanup_kernel(dev, qp);
2283 return 0;
2286 int qedr_destroy_qp(struct ib_qp *ibqp)
2288 struct qedr_qp *qp = get_qedr_qp(ibqp);
2289 struct qedr_dev *dev = qp->dev;
2290 struct ib_qp_attr attr;
2291 int attr_mask = 0;
2292 int rc = 0;
2294 DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2295 qp, qp->qp_type);
2297 if (rdma_protocol_roce(&dev->ibdev, 1)) {
2298 if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2299 (qp->state != QED_ROCE_QP_STATE_ERR) &&
2300 (qp->state != QED_ROCE_QP_STATE_INIT)) {
2302 attr.qp_state = IB_QPS_ERR;
2303 attr_mask |= IB_QP_STATE;
2305 /* Change the QP state to ERROR */
2306 qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2308 } else {
2309 /* Wait for the connect/accept to complete */
2310 if (qp->ep) {
2311 int wait_count = 1;
2313 while (qp->ep->during_connect) {
2314 DP_DEBUG(dev, QEDR_MSG_QP,
2315 "Still in during connect/accept\n");
2317 msleep(100);
2318 if (wait_count++ > 200) {
2319 DP_NOTICE(dev,
2320 "during connect timeout\n");
2321 break;
2327 if (qp->qp_type == IB_QPT_GSI)
2328 qedr_destroy_gsi_qp(dev);
2330 qedr_free_qp_resources(dev, qp);
2332 if (atomic_dec_and_test(&qp->refcnt)) {
2333 qedr_idr_remove(dev, qp->qp_id);
2334 kfree(qp);
2336 return rc;
2339 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2340 struct ib_udata *udata)
2342 struct qedr_ah *ah;
2344 ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2345 if (!ah)
2346 return ERR_PTR(-ENOMEM);
2348 ah->attr = *attr;
2350 return &ah->ibah;
2353 int qedr_destroy_ah(struct ib_ah *ibah)
2355 struct qedr_ah *ah = get_qedr_ah(ibah);
2357 kfree(ah);
2358 return 0;
2361 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2363 struct qedr_pbl *pbl, *tmp;
2365 if (info->pbl_table)
2366 list_add_tail(&info->pbl_table->list_entry,
2367 &info->free_pbl_list);
2369 if (!list_empty(&info->inuse_pbl_list))
2370 list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2372 list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2373 list_del(&pbl->list_entry);
2374 qedr_free_pbl(dev, &info->pbl_info, pbl);
2378 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2379 size_t page_list_len, bool two_layered)
2381 struct qedr_pbl *tmp;
2382 int rc;
2384 INIT_LIST_HEAD(&info->free_pbl_list);
2385 INIT_LIST_HEAD(&info->inuse_pbl_list);
2387 rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2388 page_list_len, two_layered);
2389 if (rc)
2390 goto done;
2392 info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2393 if (IS_ERR(info->pbl_table)) {
2394 rc = PTR_ERR(info->pbl_table);
2395 goto done;
2398 DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2399 &info->pbl_table->pa);
2401 /* in usual case we use 2 PBLs, so we add one to free
2402 * list and allocating another one
2404 tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2405 if (IS_ERR(tmp)) {
2406 DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2407 goto done;
2410 list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2412 DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2414 done:
2415 if (rc)
2416 free_mr_info(dev, info);
2418 return rc;
2421 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2422 u64 usr_addr, int acc, struct ib_udata *udata)
2424 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2425 struct qedr_mr *mr;
2426 struct qedr_pd *pd;
2427 int rc = -ENOMEM;
2429 pd = get_qedr_pd(ibpd);
2430 DP_DEBUG(dev, QEDR_MSG_MR,
2431 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2432 pd->pd_id, start, len, usr_addr, acc);
2434 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2435 return ERR_PTR(-EINVAL);
2437 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2438 if (!mr)
2439 return ERR_PTR(rc);
2441 mr->type = QEDR_MR_USER;
2443 mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2444 if (IS_ERR(mr->umem)) {
2445 rc = -EFAULT;
2446 goto err0;
2449 rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2450 if (rc)
2451 goto err1;
2453 qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2454 &mr->info.pbl_info, mr->umem->page_shift);
2456 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2457 if (rc) {
2458 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2459 goto err1;
2462 /* Index only, 18 bit long, lkey = itid << 8 | key */
2463 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2464 mr->hw_mr.key = 0;
2465 mr->hw_mr.pd = pd->pd_id;
2466 mr->hw_mr.local_read = 1;
2467 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2468 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2469 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2470 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2471 mr->hw_mr.mw_bind = false;
2472 mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2473 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2474 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2475 mr->hw_mr.page_size_log = mr->umem->page_shift;
2476 mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2477 mr->hw_mr.length = len;
2478 mr->hw_mr.vaddr = usr_addr;
2479 mr->hw_mr.zbva = false;
2480 mr->hw_mr.phy_mr = false;
2481 mr->hw_mr.dma_mr = false;
2483 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2484 if (rc) {
2485 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2486 goto err2;
2489 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2490 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2491 mr->hw_mr.remote_atomic)
2492 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2494 DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2495 mr->ibmr.lkey);
2496 return &mr->ibmr;
2498 err2:
2499 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2500 err1:
2501 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2502 err0:
2503 kfree(mr);
2504 return ERR_PTR(rc);
2507 int qedr_dereg_mr(struct ib_mr *ib_mr)
2509 struct qedr_mr *mr = get_qedr_mr(ib_mr);
2510 struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2511 int rc = 0;
2513 rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2514 if (rc)
2515 return rc;
2517 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2519 if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2520 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2522 /* it could be user registered memory. */
2523 if (mr->umem)
2524 ib_umem_release(mr->umem);
2526 kfree(mr);
2528 return rc;
2531 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2532 int max_page_list_len)
2534 struct qedr_pd *pd = get_qedr_pd(ibpd);
2535 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2536 struct qedr_mr *mr;
2537 int rc = -ENOMEM;
2539 DP_DEBUG(dev, QEDR_MSG_MR,
2540 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2541 max_page_list_len);
2543 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2544 if (!mr)
2545 return ERR_PTR(rc);
2547 mr->dev = dev;
2548 mr->type = QEDR_MR_FRMR;
2550 rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2551 if (rc)
2552 goto err0;
2554 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2555 if (rc) {
2556 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2557 goto err0;
2560 /* Index only, 18 bit long, lkey = itid << 8 | key */
2561 mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2562 mr->hw_mr.key = 0;
2563 mr->hw_mr.pd = pd->pd_id;
2564 mr->hw_mr.local_read = 1;
2565 mr->hw_mr.local_write = 0;
2566 mr->hw_mr.remote_read = 0;
2567 mr->hw_mr.remote_write = 0;
2568 mr->hw_mr.remote_atomic = 0;
2569 mr->hw_mr.mw_bind = false;
2570 mr->hw_mr.pbl_ptr = 0;
2571 mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2572 mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2573 mr->hw_mr.fbo = 0;
2574 mr->hw_mr.length = 0;
2575 mr->hw_mr.vaddr = 0;
2576 mr->hw_mr.zbva = false;
2577 mr->hw_mr.phy_mr = true;
2578 mr->hw_mr.dma_mr = false;
2580 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2581 if (rc) {
2582 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2583 goto err1;
2586 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2587 mr->ibmr.rkey = mr->ibmr.lkey;
2589 DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2590 return mr;
2592 err1:
2593 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2594 err0:
2595 kfree(mr);
2596 return ERR_PTR(rc);
2599 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2600 enum ib_mr_type mr_type, u32 max_num_sg)
2602 struct qedr_mr *mr;
2604 if (mr_type != IB_MR_TYPE_MEM_REG)
2605 return ERR_PTR(-EINVAL);
2607 mr = __qedr_alloc_mr(ibpd, max_num_sg);
2609 if (IS_ERR(mr))
2610 return ERR_PTR(-EINVAL);
2612 return &mr->ibmr;
2615 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2617 struct qedr_mr *mr = get_qedr_mr(ibmr);
2618 struct qedr_pbl *pbl_table;
2619 struct regpair *pbe;
2620 u32 pbes_in_page;
2622 if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2623 DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
2624 return -ENOMEM;
2627 DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2628 mr->npages, addr);
2630 pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2631 pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2632 pbe = (struct regpair *)pbl_table->va;
2633 pbe += mr->npages % pbes_in_page;
2634 pbe->lo = cpu_to_le32((u32)addr);
2635 pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2637 mr->npages++;
2639 return 0;
2642 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2644 int work = info->completed - info->completed_handled - 1;
2646 DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2647 while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2648 struct qedr_pbl *pbl;
2650 /* Free all the page list that are possible to be freed
2651 * (all the ones that were invalidated), under the assumption
2652 * that if an FMR was completed successfully that means that
2653 * if there was an invalidate operation before it also ended
2655 pbl = list_first_entry(&info->inuse_pbl_list,
2656 struct qedr_pbl, list_entry);
2657 list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2658 info->completed_handled++;
2662 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2663 int sg_nents, unsigned int *sg_offset)
2665 struct qedr_mr *mr = get_qedr_mr(ibmr);
2667 mr->npages = 0;
2669 handle_completed_mrs(mr->dev, &mr->info);
2670 return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2673 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2675 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2676 struct qedr_pd *pd = get_qedr_pd(ibpd);
2677 struct qedr_mr *mr;
2678 int rc;
2680 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2681 if (!mr)
2682 return ERR_PTR(-ENOMEM);
2684 mr->type = QEDR_MR_DMA;
2686 rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2687 if (rc) {
2688 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2689 goto err1;
2692 /* index only, 18 bit long, lkey = itid << 8 | key */
2693 mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2694 mr->hw_mr.pd = pd->pd_id;
2695 mr->hw_mr.local_read = 1;
2696 mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2697 mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2698 mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2699 mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2700 mr->hw_mr.dma_mr = true;
2702 rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2703 if (rc) {
2704 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2705 goto err2;
2708 mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2709 if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2710 mr->hw_mr.remote_atomic)
2711 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2713 DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2714 return &mr->ibmr;
2716 err2:
2717 dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2718 err1:
2719 kfree(mr);
2720 return ERR_PTR(rc);
2723 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2725 return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2728 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2730 int i, len = 0;
2732 for (i = 0; i < num_sge; i++)
2733 len += sg_list[i].length;
2735 return len;
2738 static void swap_wqe_data64(u64 *p)
2740 int i;
2742 for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2743 *p = cpu_to_be64(cpu_to_le64(*p));
2746 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2747 struct qedr_qp *qp, u8 *wqe_size,
2748 struct ib_send_wr *wr,
2749 struct ib_send_wr **bad_wr, u8 *bits,
2750 u8 bit)
2752 u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2753 char *seg_prt, *wqe;
2754 int i, seg_siz;
2756 if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2757 DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2758 *bad_wr = wr;
2759 return 0;
2762 if (!data_size)
2763 return data_size;
2765 *bits |= bit;
2767 seg_prt = NULL;
2768 wqe = NULL;
2769 seg_siz = 0;
2771 /* Copy data inline */
2772 for (i = 0; i < wr->num_sge; i++) {
2773 u32 len = wr->sg_list[i].length;
2774 void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2776 while (len > 0) {
2777 u32 cur;
2779 /* New segment required */
2780 if (!seg_siz) {
2781 wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2782 seg_prt = wqe;
2783 seg_siz = sizeof(struct rdma_sq_common_wqe);
2784 (*wqe_size)++;
2787 /* Calculate currently allowed length */
2788 cur = min_t(u32, len, seg_siz);
2789 memcpy(seg_prt, src, cur);
2791 /* Update segment variables */
2792 seg_prt += cur;
2793 seg_siz -= cur;
2795 /* Update sge variables */
2796 src += cur;
2797 len -= cur;
2799 /* Swap fully-completed segments */
2800 if (!seg_siz)
2801 swap_wqe_data64((u64 *)wqe);
2805 /* swap last not completed segment */
2806 if (seg_siz)
2807 swap_wqe_data64((u64 *)wqe);
2809 return data_size;
2812 #define RQ_SGE_SET(sge, vaddr, vlength, vflags) \
2813 do { \
2814 DMA_REGPAIR_LE(sge->addr, vaddr); \
2815 (sge)->length = cpu_to_le32(vlength); \
2816 (sge)->flags = cpu_to_le32(vflags); \
2817 } while (0)
2819 #define SRQ_HDR_SET(hdr, vwr_id, num_sge) \
2820 do { \
2821 DMA_REGPAIR_LE(hdr->wr_id, vwr_id); \
2822 (hdr)->num_sges = num_sge; \
2823 } while (0)
2825 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \
2826 do { \
2827 DMA_REGPAIR_LE(sge->addr, vaddr); \
2828 (sge)->length = cpu_to_le32(vlength); \
2829 (sge)->l_key = cpu_to_le32(vlkey); \
2830 } while (0)
2832 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2833 struct ib_send_wr *wr)
2835 u32 data_size = 0;
2836 int i;
2838 for (i = 0; i < wr->num_sge; i++) {
2839 struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2841 DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2842 sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2843 sge->length = cpu_to_le32(wr->sg_list[i].length);
2844 data_size += wr->sg_list[i].length;
2847 if (wqe_size)
2848 *wqe_size += wr->num_sge;
2850 return data_size;
2853 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2854 struct qedr_qp *qp,
2855 struct rdma_sq_rdma_wqe_1st *rwqe,
2856 struct rdma_sq_rdma_wqe_2nd *rwqe2,
2857 struct ib_send_wr *wr,
2858 struct ib_send_wr **bad_wr)
2860 rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2861 DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2863 if (wr->send_flags & IB_SEND_INLINE &&
2864 (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2865 wr->opcode == IB_WR_RDMA_WRITE)) {
2866 u8 flags = 0;
2868 SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2869 return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2870 bad_wr, &rwqe->flags, flags);
2873 return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2876 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2877 struct qedr_qp *qp,
2878 struct rdma_sq_send_wqe_1st *swqe,
2879 struct rdma_sq_send_wqe_2st *swqe2,
2880 struct ib_send_wr *wr,
2881 struct ib_send_wr **bad_wr)
2883 memset(swqe2, 0, sizeof(*swqe2));
2884 if (wr->send_flags & IB_SEND_INLINE) {
2885 u8 flags = 0;
2887 SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2888 return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2889 bad_wr, &swqe->flags, flags);
2892 return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2895 static int qedr_prepare_reg(struct qedr_qp *qp,
2896 struct rdma_sq_fmr_wqe_1st *fwqe1,
2897 struct ib_reg_wr *wr)
2899 struct qedr_mr *mr = get_qedr_mr(wr->mr);
2900 struct rdma_sq_fmr_wqe_2nd *fwqe2;
2902 fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2903 fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2904 fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2905 fwqe1->l_key = wr->key;
2907 fwqe2->access_ctrl = 0;
2909 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2910 !!(wr->access & IB_ACCESS_REMOTE_READ));
2911 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2912 !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2913 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2914 !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2915 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2916 SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2917 !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2918 fwqe2->fmr_ctrl = 0;
2920 SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2921 ilog2(mr->ibmr.page_size) - 12);
2923 fwqe2->length_hi = 0;
2924 fwqe2->length_lo = mr->ibmr.length;
2925 fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2926 fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2928 qp->wqe_wr_id[qp->sq.prod].mr = mr;
2930 return 0;
2933 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2935 switch (opcode) {
2936 case IB_WR_RDMA_WRITE:
2937 case IB_WR_RDMA_WRITE_WITH_IMM:
2938 return IB_WC_RDMA_WRITE;
2939 case IB_WR_SEND_WITH_IMM:
2940 case IB_WR_SEND:
2941 case IB_WR_SEND_WITH_INV:
2942 return IB_WC_SEND;
2943 case IB_WR_RDMA_READ:
2944 case IB_WR_RDMA_READ_WITH_INV:
2945 return IB_WC_RDMA_READ;
2946 case IB_WR_ATOMIC_CMP_AND_SWP:
2947 return IB_WC_COMP_SWAP;
2948 case IB_WR_ATOMIC_FETCH_AND_ADD:
2949 return IB_WC_FETCH_ADD;
2950 case IB_WR_REG_MR:
2951 return IB_WC_REG_MR;
2952 case IB_WR_LOCAL_INV:
2953 return IB_WC_LOCAL_INV;
2954 default:
2955 return IB_WC_SEND;
2959 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2961 int wq_is_full, err_wr, pbl_is_full;
2962 struct qedr_dev *dev = qp->dev;
2964 /* prevent SQ overflow and/or processing of a bad WR */
2965 err_wr = wr->num_sge > qp->sq.max_sges;
2966 wq_is_full = qedr_wq_is_full(&qp->sq);
2967 pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2968 QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2969 if (wq_is_full || err_wr || pbl_is_full) {
2970 if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2971 DP_ERR(dev,
2972 "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2973 qp);
2974 qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2977 if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2978 DP_ERR(dev,
2979 "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2980 qp);
2981 qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2984 if (pbl_is_full &&
2985 !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2986 DP_ERR(dev,
2987 "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2988 qp);
2989 qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2991 return false;
2993 return true;
2996 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2997 struct ib_send_wr **bad_wr)
2999 struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3000 struct qedr_qp *qp = get_qedr_qp(ibqp);
3001 struct rdma_sq_atomic_wqe_1st *awqe1;
3002 struct rdma_sq_atomic_wqe_2nd *awqe2;
3003 struct rdma_sq_atomic_wqe_3rd *awqe3;
3004 struct rdma_sq_send_wqe_2st *swqe2;
3005 struct rdma_sq_local_inv_wqe *iwqe;
3006 struct rdma_sq_rdma_wqe_2nd *rwqe2;
3007 struct rdma_sq_send_wqe_1st *swqe;
3008 struct rdma_sq_rdma_wqe_1st *rwqe;
3009 struct rdma_sq_fmr_wqe_1st *fwqe1;
3010 struct rdma_sq_common_wqe *wqe;
3011 u32 length;
3012 int rc = 0;
3013 bool comp;
3015 if (!qedr_can_post_send(qp, wr)) {
3016 *bad_wr = wr;
3017 return -ENOMEM;
3020 wqe = qed_chain_produce(&qp->sq.pbl);
3021 qp->wqe_wr_id[qp->sq.prod].signaled =
3022 !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3024 wqe->flags = 0;
3025 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3026 !!(wr->send_flags & IB_SEND_SOLICITED));
3027 comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3028 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3029 SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3030 !!(wr->send_flags & IB_SEND_FENCE));
3031 wqe->prev_wqe_size = qp->prev_wqe_size;
3033 qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3035 switch (wr->opcode) {
3036 case IB_WR_SEND_WITH_IMM:
3037 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3038 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3039 swqe->wqe_size = 2;
3040 swqe2 = qed_chain_produce(&qp->sq.pbl);
3042 swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3043 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3044 wr, bad_wr);
3045 swqe->length = cpu_to_le32(length);
3046 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3047 qp->prev_wqe_size = swqe->wqe_size;
3048 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3049 break;
3050 case IB_WR_SEND:
3051 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3052 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3054 swqe->wqe_size = 2;
3055 swqe2 = qed_chain_produce(&qp->sq.pbl);
3056 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3057 wr, bad_wr);
3058 swqe->length = cpu_to_le32(length);
3059 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3060 qp->prev_wqe_size = swqe->wqe_size;
3061 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3062 break;
3063 case IB_WR_SEND_WITH_INV:
3064 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3065 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3066 swqe2 = qed_chain_produce(&qp->sq.pbl);
3067 swqe->wqe_size = 2;
3068 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3069 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3070 wr, bad_wr);
3071 swqe->length = cpu_to_le32(length);
3072 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3073 qp->prev_wqe_size = swqe->wqe_size;
3074 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3075 break;
3077 case IB_WR_RDMA_WRITE_WITH_IMM:
3078 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3079 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3081 rwqe->wqe_size = 2;
3082 rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3083 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3084 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3085 wr, bad_wr);
3086 rwqe->length = cpu_to_le32(length);
3087 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3088 qp->prev_wqe_size = rwqe->wqe_size;
3089 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3090 break;
3091 case IB_WR_RDMA_WRITE:
3092 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3093 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3095 rwqe->wqe_size = 2;
3096 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3097 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3098 wr, bad_wr);
3099 rwqe->length = cpu_to_le32(length);
3100 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3101 qp->prev_wqe_size = rwqe->wqe_size;
3102 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3103 break;
3104 case IB_WR_RDMA_READ_WITH_INV:
3105 SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3106 /* fallthrough -- same is identical to RDMA READ */
3108 case IB_WR_RDMA_READ:
3109 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3110 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3112 rwqe->wqe_size = 2;
3113 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3114 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3115 wr, bad_wr);
3116 rwqe->length = cpu_to_le32(length);
3117 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3118 qp->prev_wqe_size = rwqe->wqe_size;
3119 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3120 break;
3122 case IB_WR_ATOMIC_CMP_AND_SWP:
3123 case IB_WR_ATOMIC_FETCH_AND_ADD:
3124 awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3125 awqe1->wqe_size = 4;
3127 awqe2 = qed_chain_produce(&qp->sq.pbl);
3128 DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3129 awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3131 awqe3 = qed_chain_produce(&qp->sq.pbl);
3133 if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3134 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3135 DMA_REGPAIR_LE(awqe3->swap_data,
3136 atomic_wr(wr)->compare_add);
3137 } else {
3138 wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3139 DMA_REGPAIR_LE(awqe3->swap_data,
3140 atomic_wr(wr)->swap);
3141 DMA_REGPAIR_LE(awqe3->cmp_data,
3142 atomic_wr(wr)->compare_add);
3145 qedr_prepare_sq_sges(qp, NULL, wr);
3147 qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3148 qp->prev_wqe_size = awqe1->wqe_size;
3149 break;
3151 case IB_WR_LOCAL_INV:
3152 iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3153 iwqe->wqe_size = 1;
3155 iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3156 iwqe->inv_l_key = wr->ex.invalidate_rkey;
3157 qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3158 qp->prev_wqe_size = iwqe->wqe_size;
3159 break;
3160 case IB_WR_REG_MR:
3161 DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3162 wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3163 fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3164 fwqe1->wqe_size = 2;
3166 rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3167 if (rc) {
3168 DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3169 *bad_wr = wr;
3170 break;
3173 qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3174 qp->prev_wqe_size = fwqe1->wqe_size;
3175 break;
3176 default:
3177 DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3178 rc = -EINVAL;
3179 *bad_wr = wr;
3180 break;
3183 if (*bad_wr) {
3184 u16 value;
3186 /* Restore prod to its position before
3187 * this WR was processed
3189 value = le16_to_cpu(qp->sq.db_data.data.value);
3190 qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3192 /* Restore prev_wqe_size */
3193 qp->prev_wqe_size = wqe->prev_wqe_size;
3194 rc = -EINVAL;
3195 DP_ERR(dev, "POST SEND FAILED\n");
3198 return rc;
3201 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3202 struct ib_send_wr **bad_wr)
3204 struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3205 struct qedr_qp *qp = get_qedr_qp(ibqp);
3206 unsigned long flags;
3207 int rc = 0;
3209 *bad_wr = NULL;
3211 if (qp->qp_type == IB_QPT_GSI)
3212 return qedr_gsi_post_send(ibqp, wr, bad_wr);
3214 spin_lock_irqsave(&qp->q_lock, flags);
3216 if (rdma_protocol_roce(&dev->ibdev, 1)) {
3217 if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3218 (qp->state != QED_ROCE_QP_STATE_ERR) &&
3219 (qp->state != QED_ROCE_QP_STATE_SQD)) {
3220 spin_unlock_irqrestore(&qp->q_lock, flags);
3221 *bad_wr = wr;
3222 DP_DEBUG(dev, QEDR_MSG_CQ,
3223 "QP in wrong state! QP icid=0x%x state %d\n",
3224 qp->icid, qp->state);
3225 return -EINVAL;
3229 while (wr) {
3230 rc = __qedr_post_send(ibqp, wr, bad_wr);
3231 if (rc)
3232 break;
3234 qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3236 qedr_inc_sw_prod(&qp->sq);
3238 qp->sq.db_data.data.value++;
3240 wr = wr->next;
3243 /* Trigger doorbell
3244 * If there was a failure in the first WR then it will be triggered in
3245 * vane. However this is not harmful (as long as the producer value is
3246 * unchanged). For performance reasons we avoid checking for this
3247 * redundant doorbell.
3249 wmb();
3250 writel(qp->sq.db_data.raw, qp->sq.db);
3252 /* Make sure write sticks */
3253 mmiowb();
3255 spin_unlock_irqrestore(&qp->q_lock, flags);
3257 return rc;
3260 int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3261 struct ib_recv_wr **bad_wr)
3263 struct qedr_qp *qp = get_qedr_qp(ibqp);
3264 struct qedr_dev *dev = qp->dev;
3265 unsigned long flags;
3266 int status = 0;
3268 if (qp->qp_type == IB_QPT_GSI)
3269 return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3271 spin_lock_irqsave(&qp->q_lock, flags);
3273 if (qp->state == QED_ROCE_QP_STATE_RESET) {
3274 spin_unlock_irqrestore(&qp->q_lock, flags);
3275 *bad_wr = wr;
3276 return -EINVAL;
3279 while (wr) {
3280 int i;
3282 if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3283 QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3284 wr->num_sge > qp->rq.max_sges) {
3285 DP_ERR(dev, "Can't post WR (%d < %d) || (%d > %d)\n",
3286 qed_chain_get_elem_left_u32(&qp->rq.pbl),
3287 QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3288 qp->rq.max_sges);
3289 status = -ENOMEM;
3290 *bad_wr = wr;
3291 break;
3293 for (i = 0; i < wr->num_sge; i++) {
3294 u32 flags = 0;
3295 struct rdma_rq_sge *rqe =
3296 qed_chain_produce(&qp->rq.pbl);
3298 /* First one must include the number
3299 * of SGE in the list
3301 if (!i)
3302 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3303 wr->num_sge);
3305 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
3306 wr->sg_list[i].lkey);
3308 RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3309 wr->sg_list[i].length, flags);
3312 /* Special case of no sges. FW requires between 1-4 sges...
3313 * in this case we need to post 1 sge with length zero. this is
3314 * because rdma write with immediate consumes an RQ.
3316 if (!wr->num_sge) {
3317 u32 flags = 0;
3318 struct rdma_rq_sge *rqe =
3319 qed_chain_produce(&qp->rq.pbl);
3321 /* First one must include the number
3322 * of SGE in the list
3324 SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
3325 SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3327 RQ_SGE_SET(rqe, 0, 0, flags);
3328 i = 1;
3331 qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3332 qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3334 qedr_inc_sw_prod(&qp->rq);
3336 /* Flush all the writes before signalling doorbell */
3337 wmb();
3339 qp->rq.db_data.data.value++;
3341 writel(qp->rq.db_data.raw, qp->rq.db);
3343 /* Make sure write sticks */
3344 mmiowb();
3346 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3347 writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3348 mmiowb(); /* for second doorbell */
3351 wr = wr->next;
3354 spin_unlock_irqrestore(&qp->q_lock, flags);
3356 return status;
3359 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3361 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3363 return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3364 cq->pbl_toggle;
3367 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3369 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3370 struct qedr_qp *qp;
3372 qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3373 resp_cqe->qp_handle.lo,
3374 u64);
3375 return qp;
3378 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3380 struct rdma_cqe_requester *resp_cqe = &cqe->req;
3382 return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3385 /* Return latest CQE (needs processing) */
3386 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3388 return cq->latest_cqe;
3391 /* In fmr we need to increase the number of fmr completed counter for the fmr
3392 * algorithm determining whether we can free a pbl or not.
3393 * we need to perform this whether the work request was signaled or not. for
3394 * this purpose we call this function from the condition that checks if a wr
3395 * should be skipped, to make sure we don't miss it ( possibly this fmr
3396 * operation was not signalted)
3398 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3400 if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3401 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3404 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3405 struct qedr_cq *cq, int num_entries,
3406 struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3407 int force)
3409 u16 cnt = 0;
3411 while (num_entries && qp->sq.wqe_cons != hw_cons) {
3412 if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3413 qedr_chk_if_fmr(qp);
3414 /* skip WC */
3415 goto next_cqe;
3418 /* fill WC */
3419 wc->status = status;
3420 wc->vendor_err = 0;
3421 wc->wc_flags = 0;
3422 wc->src_qp = qp->id;
3423 wc->qp = &qp->ibqp;
3425 wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3426 wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3428 switch (wc->opcode) {
3429 case IB_WC_RDMA_WRITE:
3430 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3431 break;
3432 case IB_WC_COMP_SWAP:
3433 case IB_WC_FETCH_ADD:
3434 wc->byte_len = 8;
3435 break;
3436 case IB_WC_REG_MR:
3437 qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3438 break;
3439 case IB_WC_RDMA_READ:
3440 case IB_WC_SEND:
3441 wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3442 break;
3443 default:
3444 break;
3447 num_entries--;
3448 wc++;
3449 cnt++;
3450 next_cqe:
3451 while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3452 qed_chain_consume(&qp->sq.pbl);
3453 qedr_inc_sw_cons(&qp->sq);
3456 return cnt;
3459 static int qedr_poll_cq_req(struct qedr_dev *dev,
3460 struct qedr_qp *qp, struct qedr_cq *cq,
3461 int num_entries, struct ib_wc *wc,
3462 struct rdma_cqe_requester *req)
3464 int cnt = 0;
3466 switch (req->status) {
3467 case RDMA_CQE_REQ_STS_OK:
3468 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3469 IB_WC_SUCCESS, 0);
3470 break;
3471 case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3472 if (qp->state != QED_ROCE_QP_STATE_ERR)
3473 DP_DEBUG(dev, QEDR_MSG_CQ,
3474 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3475 cq->icid, qp->icid);
3476 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3477 IB_WC_WR_FLUSH_ERR, 1);
3478 break;
3479 default:
3480 /* process all WQE before the cosumer */
3481 qp->state = QED_ROCE_QP_STATE_ERR;
3482 cnt = process_req(dev, qp, cq, num_entries, wc,
3483 req->sq_cons - 1, IB_WC_SUCCESS, 0);
3484 wc += cnt;
3485 /* if we have extra WC fill it with actual error info */
3486 if (cnt < num_entries) {
3487 enum ib_wc_status wc_status;
3489 switch (req->status) {
3490 case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3491 DP_ERR(dev,
3492 "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3493 cq->icid, qp->icid);
3494 wc_status = IB_WC_BAD_RESP_ERR;
3495 break;
3496 case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3497 DP_ERR(dev,
3498 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3499 cq->icid, qp->icid);
3500 wc_status = IB_WC_LOC_LEN_ERR;
3501 break;
3502 case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3503 DP_ERR(dev,
3504 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3505 cq->icid, qp->icid);
3506 wc_status = IB_WC_LOC_QP_OP_ERR;
3507 break;
3508 case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3509 DP_ERR(dev,
3510 "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3511 cq->icid, qp->icid);
3512 wc_status = IB_WC_LOC_PROT_ERR;
3513 break;
3514 case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3515 DP_ERR(dev,
3516 "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3517 cq->icid, qp->icid);
3518 wc_status = IB_WC_MW_BIND_ERR;
3519 break;
3520 case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3521 DP_ERR(dev,
3522 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3523 cq->icid, qp->icid);
3524 wc_status = IB_WC_REM_INV_REQ_ERR;
3525 break;
3526 case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3527 DP_ERR(dev,
3528 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3529 cq->icid, qp->icid);
3530 wc_status = IB_WC_REM_ACCESS_ERR;
3531 break;
3532 case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3533 DP_ERR(dev,
3534 "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3535 cq->icid, qp->icid);
3536 wc_status = IB_WC_REM_OP_ERR;
3537 break;
3538 case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3539 DP_ERR(dev,
3540 "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3541 cq->icid, qp->icid);
3542 wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3543 break;
3544 case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3545 DP_ERR(dev,
3546 "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3547 cq->icid, qp->icid);
3548 wc_status = IB_WC_RETRY_EXC_ERR;
3549 break;
3550 default:
3551 DP_ERR(dev,
3552 "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3553 cq->icid, qp->icid);
3554 wc_status = IB_WC_GENERAL_ERR;
3556 cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3557 wc_status, 1);
3561 return cnt;
3564 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3566 switch (status) {
3567 case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3568 return IB_WC_LOC_ACCESS_ERR;
3569 case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3570 return IB_WC_LOC_LEN_ERR;
3571 case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3572 return IB_WC_LOC_QP_OP_ERR;
3573 case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3574 return IB_WC_LOC_PROT_ERR;
3575 case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3576 return IB_WC_MW_BIND_ERR;
3577 case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3578 return IB_WC_REM_INV_RD_REQ_ERR;
3579 case RDMA_CQE_RESP_STS_OK:
3580 return IB_WC_SUCCESS;
3581 default:
3582 return IB_WC_GENERAL_ERR;
3586 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3587 struct ib_wc *wc)
3589 wc->status = IB_WC_SUCCESS;
3590 wc->byte_len = le32_to_cpu(resp->length);
3592 if (resp->flags & QEDR_RESP_IMM) {
3593 wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
3594 wc->wc_flags |= IB_WC_WITH_IMM;
3596 if (resp->flags & QEDR_RESP_RDMA)
3597 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3599 if (resp->flags & QEDR_RESP_INV)
3600 return -EINVAL;
3602 } else if (resp->flags & QEDR_RESP_INV) {
3603 wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3604 wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3606 if (resp->flags & QEDR_RESP_RDMA)
3607 return -EINVAL;
3609 } else if (resp->flags & QEDR_RESP_RDMA) {
3610 return -EINVAL;
3613 return 0;
3616 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3617 struct qedr_cq *cq, struct ib_wc *wc,
3618 struct rdma_cqe_responder *resp, u64 wr_id)
3620 /* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3621 wc->opcode = IB_WC_RECV;
3622 wc->wc_flags = 0;
3624 if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3625 if (qedr_set_ok_cqe_resp_wc(resp, wc))
3626 DP_ERR(dev,
3627 "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3628 cq, cq->icid, resp->flags);
3630 } else {
3631 wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3632 if (wc->status == IB_WC_GENERAL_ERR)
3633 DP_ERR(dev,
3634 "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3635 cq, cq->icid, resp->status);
3638 /* Fill the rest of the WC */
3639 wc->vendor_err = 0;
3640 wc->src_qp = qp->id;
3641 wc->qp = &qp->ibqp;
3642 wc->wr_id = wr_id;
3645 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3646 struct qedr_cq *cq, struct ib_wc *wc,
3647 struct rdma_cqe_responder *resp)
3649 u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3651 __process_resp_one(dev, qp, cq, wc, resp, wr_id);
3653 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3654 qed_chain_consume(&qp->rq.pbl);
3655 qedr_inc_sw_cons(&qp->rq);
3657 return 1;
3660 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3661 int num_entries, struct ib_wc *wc, u16 hw_cons)
3663 u16 cnt = 0;
3665 while (num_entries && qp->rq.wqe_cons != hw_cons) {
3666 /* fill WC */
3667 wc->status = IB_WC_WR_FLUSH_ERR;
3668 wc->vendor_err = 0;
3669 wc->wc_flags = 0;
3670 wc->src_qp = qp->id;
3671 wc->byte_len = 0;
3672 wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3673 wc->qp = &qp->ibqp;
3674 num_entries--;
3675 wc++;
3676 cnt++;
3677 while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3678 qed_chain_consume(&qp->rq.pbl);
3679 qedr_inc_sw_cons(&qp->rq);
3682 return cnt;
3685 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3686 struct rdma_cqe_responder *resp, int *update)
3688 if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) {
3689 consume_cqe(cq);
3690 *update |= 1;
3694 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3695 struct qedr_cq *cq, int num_entries,
3696 struct ib_wc *wc, struct rdma_cqe_responder *resp,
3697 int *update)
3699 int cnt;
3701 if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3702 cnt = process_resp_flush(qp, cq, num_entries, wc,
3703 resp->rq_cons);
3704 try_consume_resp_cqe(cq, qp, resp, update);
3705 } else {
3706 cnt = process_resp_one(dev, qp, cq, wc, resp);
3707 consume_cqe(cq);
3708 *update |= 1;
3711 return cnt;
3714 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3715 struct rdma_cqe_requester *req, int *update)
3717 if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
3718 consume_cqe(cq);
3719 *update |= 1;
3723 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3725 struct qedr_dev *dev = get_qedr_dev(ibcq->device);
3726 struct qedr_cq *cq = get_qedr_cq(ibcq);
3727 union rdma_cqe *cqe = cq->latest_cqe;
3728 u32 old_cons, new_cons;
3729 unsigned long flags;
3730 int update = 0;
3731 int done = 0;
3733 if (cq->destroyed) {
3734 DP_ERR(dev,
3735 "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
3736 cq, cq->icid);
3737 return 0;
3740 if (cq->cq_type == QEDR_CQ_TYPE_GSI)
3741 return qedr_gsi_poll_cq(ibcq, num_entries, wc);
3743 spin_lock_irqsave(&cq->cq_lock, flags);
3744 old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3745 while (num_entries && is_valid_cqe(cq, cqe)) {
3746 struct qedr_qp *qp;
3747 int cnt = 0;
3749 /* prevent speculative reads of any field of CQE */
3750 rmb();
3752 qp = cqe_get_qp(cqe);
3753 if (!qp) {
3754 WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
3755 break;
3758 wc->qp = &qp->ibqp;
3760 switch (cqe_get_type(cqe)) {
3761 case RDMA_CQE_TYPE_REQUESTER:
3762 cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
3763 &cqe->req);
3764 try_consume_req_cqe(cq, qp, &cqe->req, &update);
3765 break;
3766 case RDMA_CQE_TYPE_RESPONDER_RQ:
3767 cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3768 &cqe->resp, &update);
3769 break;
3770 case RDMA_CQE_TYPE_INVALID:
3771 default:
3772 DP_ERR(dev, "Error: invalid CQE type = %d\n",
3773 cqe_get_type(cqe));
3775 num_entries -= cnt;
3776 wc += cnt;
3777 done += cnt;
3779 cqe = get_cqe(cq);
3781 new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
3783 cq->cq_cons += new_cons - old_cons;
3785 if (update)
3786 /* doorbell notifies abount latest VALID entry,
3787 * but chain already point to the next INVALID one
3789 doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
3791 spin_unlock_irqrestore(&cq->cq_lock, flags);
3792 return done;
3795 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
3796 u8 port_num,
3797 const struct ib_wc *in_wc,
3798 const struct ib_grh *in_grh,
3799 const struct ib_mad_hdr *mad_hdr,
3800 size_t in_mad_size, struct ib_mad_hdr *out_mad,
3801 size_t *out_mad_size, u16 *out_mad_pkey_index)
3803 struct qedr_dev *dev = get_qedr_dev(ibdev);
3805 DP_DEBUG(dev, QEDR_MSG_GSI,
3806 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
3807 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
3808 mad_hdr->class_specific, mad_hdr->class_version,
3809 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
3810 return IB_MAD_RESULT_SUCCESS;