1 /*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
20 * Contact Information:
21 * linux-drivers@emulex.com
25 * Costa Mesa, CA 92626
26 *******************************************************************/
28 #include <linux/dma-mapping.h>
29 #include <rdma/ib_verbs.h>
30 #include <rdma/ib_user_verbs.h>
31 #include <rdma/iw_cm.h>
32 #include <rdma/ib_umem.h>
33 #include <rdma/ib_addr.h>
36 #include "ocrdma_hw.h"
37 #include "ocrdma_verbs.h"
38 #include "ocrdma_abi.h"
40 int ocrdma_query_pkey(struct ib_device
*ibdev
, u8 port
, u16 index
, u16
*pkey
)
49 int ocrdma_query_gid(struct ib_device
*ibdev
, u8 port
,
50 int index
, union ib_gid
*sgid
)
52 struct ocrdma_dev
*dev
;
54 dev
= get_ocrdma_dev(ibdev
);
55 memset(sgid
, 0, sizeof(*sgid
));
56 if (index
>= OCRDMA_MAX_SGID
)
59 memcpy(sgid
, &dev
->sgid_tbl
[index
], sizeof(*sgid
));
64 int ocrdma_query_device(struct ib_device
*ibdev
, struct ib_device_attr
*attr
)
66 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibdev
);
68 memset(attr
, 0, sizeof *attr
);
69 memcpy(&attr
->fw_ver
, &dev
->attr
.fw_ver
[0],
70 min(sizeof(dev
->attr
.fw_ver
), sizeof(attr
->fw_ver
)));
71 ocrdma_get_guid(dev
, (u8
*)&attr
->sys_image_guid
);
72 attr
->max_mr_size
= ~0ull;
73 attr
->page_size_cap
= 0xffff000;
74 attr
->vendor_id
= dev
->nic_info
.pdev
->vendor
;
75 attr
->vendor_part_id
= dev
->nic_info
.pdev
->device
;
77 attr
->max_qp
= dev
->attr
.max_qp
;
78 attr
->max_ah
= OCRDMA_MAX_AH
;
79 attr
->max_qp_wr
= dev
->attr
.max_wqe
;
81 attr
->device_cap_flags
= IB_DEVICE_CURR_QP_STATE_MOD
|
82 IB_DEVICE_RC_RNR_NAK_GEN
|
83 IB_DEVICE_SHUTDOWN_PORT
|
84 IB_DEVICE_SYS_IMAGE_GUID
|
85 IB_DEVICE_LOCAL_DMA_LKEY
|
86 IB_DEVICE_MEM_MGT_EXTENSIONS
;
87 attr
->max_sge
= min(dev
->attr
.max_send_sge
, dev
->attr
.max_srq_sge
);
89 attr
->max_cq
= dev
->attr
.max_cq
;
90 attr
->max_cqe
= dev
->attr
.max_cqe
;
91 attr
->max_mr
= dev
->attr
.max_mr
;
93 attr
->max_pd
= dev
->attr
.max_pd
;
96 attr
->max_map_per_fmr
= 0;
97 attr
->max_qp_rd_atom
=
98 min(dev
->attr
.max_ord_per_qp
, dev
->attr
.max_ird_per_qp
);
99 attr
->max_qp_init_rd_atom
= dev
->attr
.max_ord_per_qp
;
100 attr
->max_srq
= dev
->attr
.max_srq
;
101 attr
->max_srq_sge
= dev
->attr
.max_srq_sge
;
102 attr
->max_srq_wr
= dev
->attr
.max_rqe
;
103 attr
->local_ca_ack_delay
= dev
->attr
.local_ca_ack_delay
;
104 attr
->max_fast_reg_page_list_len
= 0;
109 static inline void get_link_speed_and_width(struct ocrdma_dev
*dev
,
110 u8
*ib_speed
, u8
*ib_width
)
115 status
= ocrdma_mbx_get_link_speed(dev
, &speed
);
117 speed
= OCRDMA_PHYS_LINK_SPEED_ZERO
;
120 case OCRDMA_PHYS_LINK_SPEED_1GBPS
:
121 *ib_speed
= IB_SPEED_SDR
;
122 *ib_width
= IB_WIDTH_1X
;
125 case OCRDMA_PHYS_LINK_SPEED_10GBPS
:
126 *ib_speed
= IB_SPEED_QDR
;
127 *ib_width
= IB_WIDTH_1X
;
130 case OCRDMA_PHYS_LINK_SPEED_20GBPS
:
131 *ib_speed
= IB_SPEED_DDR
;
132 *ib_width
= IB_WIDTH_4X
;
135 case OCRDMA_PHYS_LINK_SPEED_40GBPS
:
136 *ib_speed
= IB_SPEED_QDR
;
137 *ib_width
= IB_WIDTH_4X
;
142 *ib_speed
= IB_SPEED_SDR
;
143 *ib_width
= IB_WIDTH_1X
;
148 int ocrdma_query_port(struct ib_device
*ibdev
,
149 u8 port
, struct ib_port_attr
*props
)
151 enum ib_port_state port_state
;
152 struct ocrdma_dev
*dev
;
153 struct net_device
*netdev
;
155 dev
= get_ocrdma_dev(ibdev
);
157 pr_err("%s(%d) invalid_port=0x%x\n", __func__
,
161 netdev
= dev
->nic_info
.netdev
;
162 if (netif_running(netdev
) && netif_oper_up(netdev
)) {
163 port_state
= IB_PORT_ACTIVE
;
164 props
->phys_state
= 5;
166 port_state
= IB_PORT_DOWN
;
167 props
->phys_state
= 3;
169 props
->max_mtu
= IB_MTU_4096
;
170 props
->active_mtu
= iboe_get_mtu(netdev
->mtu
);
175 props
->state
= port_state
;
176 props
->port_cap_flags
=
179 IB_PORT_DEVICE_MGMT_SUP
| IB_PORT_VENDOR_CLASS_SUP
| IB_PORT_IP_BASED_GIDS
;
180 props
->gid_tbl_len
= OCRDMA_MAX_SGID
;
181 props
->pkey_tbl_len
= 1;
182 props
->bad_pkey_cntr
= 0;
183 props
->qkey_viol_cntr
= 0;
184 get_link_speed_and_width(dev
, &props
->active_speed
,
185 &props
->active_width
);
186 props
->max_msg_sz
= 0x80000000;
187 props
->max_vl_num
= 4;
191 int ocrdma_modify_port(struct ib_device
*ibdev
, u8 port
, int mask
,
192 struct ib_port_modify
*props
)
194 struct ocrdma_dev
*dev
;
196 dev
= get_ocrdma_dev(ibdev
);
198 pr_err("%s(%d) invalid_port=0x%x\n", __func__
, dev
->id
, port
);
204 static int ocrdma_add_mmap(struct ocrdma_ucontext
*uctx
, u64 phy_addr
,
207 struct ocrdma_mm
*mm
;
209 mm
= kzalloc(sizeof(*mm
), GFP_KERNEL
);
212 mm
->key
.phy_addr
= phy_addr
;
214 INIT_LIST_HEAD(&mm
->entry
);
216 mutex_lock(&uctx
->mm_list_lock
);
217 list_add_tail(&mm
->entry
, &uctx
->mm_head
);
218 mutex_unlock(&uctx
->mm_list_lock
);
222 static void ocrdma_del_mmap(struct ocrdma_ucontext
*uctx
, u64 phy_addr
,
225 struct ocrdma_mm
*mm
, *tmp
;
227 mutex_lock(&uctx
->mm_list_lock
);
228 list_for_each_entry_safe(mm
, tmp
, &uctx
->mm_head
, entry
) {
229 if (len
!= mm
->key
.len
&& phy_addr
!= mm
->key
.phy_addr
)
232 list_del(&mm
->entry
);
236 mutex_unlock(&uctx
->mm_list_lock
);
239 static bool ocrdma_search_mmap(struct ocrdma_ucontext
*uctx
, u64 phy_addr
,
243 struct ocrdma_mm
*mm
;
245 mutex_lock(&uctx
->mm_list_lock
);
246 list_for_each_entry(mm
, &uctx
->mm_head
, entry
) {
247 if (len
!= mm
->key
.len
&& phy_addr
!= mm
->key
.phy_addr
)
253 mutex_unlock(&uctx
->mm_list_lock
);
257 static struct ocrdma_pd
*_ocrdma_alloc_pd(struct ocrdma_dev
*dev
,
258 struct ocrdma_ucontext
*uctx
,
259 struct ib_udata
*udata
)
261 struct ocrdma_pd
*pd
= NULL
;
264 pd
= kzalloc(sizeof(*pd
), GFP_KERNEL
);
266 return ERR_PTR(-ENOMEM
);
270 dev
->nic_info
.dev_family
== OCRDMA_GEN2_FAMILY
;
272 pd
->dpp_enabled
? OCRDMA_PD_MAX_DPP_ENABLED_QP
: 0;
276 status
= ocrdma_mbx_alloc_pd(dev
, pd
);
278 if (pd
->dpp_enabled
) {
279 pd
->dpp_enabled
= false;
284 return ERR_PTR(status
);
291 static inline int is_ucontext_pd(struct ocrdma_ucontext
*uctx
,
292 struct ocrdma_pd
*pd
)
294 return (uctx
->cntxt_pd
== pd
? true : false);
297 static int _ocrdma_dealloc_pd(struct ocrdma_dev
*dev
,
298 struct ocrdma_pd
*pd
)
302 status
= ocrdma_mbx_dealloc_pd(dev
, pd
);
307 static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev
*dev
,
308 struct ocrdma_ucontext
*uctx
,
309 struct ib_udata
*udata
)
313 uctx
->cntxt_pd
= _ocrdma_alloc_pd(dev
, uctx
, udata
);
314 if (IS_ERR(uctx
->cntxt_pd
)) {
315 status
= PTR_ERR(uctx
->cntxt_pd
);
316 uctx
->cntxt_pd
= NULL
;
320 uctx
->cntxt_pd
->uctx
= uctx
;
321 uctx
->cntxt_pd
->ibpd
.device
= &dev
->ibdev
;
326 static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext
*uctx
)
329 struct ocrdma_pd
*pd
= uctx
->cntxt_pd
;
330 struct ocrdma_dev
*dev
= get_ocrdma_dev(pd
->ibpd
.device
);
332 BUG_ON(uctx
->pd_in_use
);
333 uctx
->cntxt_pd
= NULL
;
334 status
= _ocrdma_dealloc_pd(dev
, pd
);
338 static struct ocrdma_pd
*ocrdma_get_ucontext_pd(struct ocrdma_ucontext
*uctx
)
340 struct ocrdma_pd
*pd
= NULL
;
342 mutex_lock(&uctx
->mm_list_lock
);
343 if (!uctx
->pd_in_use
) {
344 uctx
->pd_in_use
= true;
347 mutex_unlock(&uctx
->mm_list_lock
);
352 static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext
*uctx
)
354 mutex_lock(&uctx
->mm_list_lock
);
355 uctx
->pd_in_use
= false;
356 mutex_unlock(&uctx
->mm_list_lock
);
359 struct ib_ucontext
*ocrdma_alloc_ucontext(struct ib_device
*ibdev
,
360 struct ib_udata
*udata
)
363 struct ocrdma_ucontext
*ctx
;
364 struct ocrdma_alloc_ucontext_resp resp
;
365 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibdev
);
366 struct pci_dev
*pdev
= dev
->nic_info
.pdev
;
367 u32 map_len
= roundup(sizeof(u32
) * 2048, PAGE_SIZE
);
370 return ERR_PTR(-EFAULT
);
371 ctx
= kzalloc(sizeof(*ctx
), GFP_KERNEL
);
373 return ERR_PTR(-ENOMEM
);
374 INIT_LIST_HEAD(&ctx
->mm_head
);
375 mutex_init(&ctx
->mm_list_lock
);
377 ctx
->ah_tbl
.va
= dma_alloc_coherent(&pdev
->dev
, map_len
,
378 &ctx
->ah_tbl
.pa
, GFP_KERNEL
);
379 if (!ctx
->ah_tbl
.va
) {
381 return ERR_PTR(-ENOMEM
);
383 memset(ctx
->ah_tbl
.va
, 0, map_len
);
384 ctx
->ah_tbl
.len
= map_len
;
386 memset(&resp
, 0, sizeof(resp
));
387 resp
.ah_tbl_len
= ctx
->ah_tbl
.len
;
388 resp
.ah_tbl_page
= ctx
->ah_tbl
.pa
;
390 status
= ocrdma_add_mmap(ctx
, resp
.ah_tbl_page
, resp
.ah_tbl_len
);
394 status
= ocrdma_alloc_ucontext_pd(dev
, ctx
, udata
);
398 resp
.dev_id
= dev
->id
;
399 resp
.max_inline_data
= dev
->attr
.max_inline_data
;
400 resp
.wqe_size
= dev
->attr
.wqe_size
;
401 resp
.rqe_size
= dev
->attr
.rqe_size
;
402 resp
.dpp_wqe_size
= dev
->attr
.wqe_size
;
404 memcpy(resp
.fw_ver
, dev
->attr
.fw_ver
, sizeof(resp
.fw_ver
));
405 status
= ib_copy_to_udata(udata
, &resp
, sizeof(resp
));
408 return &ctx
->ibucontext
;
412 ocrdma_del_mmap(ctx
, ctx
->ah_tbl
.pa
, ctx
->ah_tbl
.len
);
414 dma_free_coherent(&pdev
->dev
, ctx
->ah_tbl
.len
, ctx
->ah_tbl
.va
,
417 return ERR_PTR(status
);
420 int ocrdma_dealloc_ucontext(struct ib_ucontext
*ibctx
)
423 struct ocrdma_mm
*mm
, *tmp
;
424 struct ocrdma_ucontext
*uctx
= get_ocrdma_ucontext(ibctx
);
425 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibctx
->device
);
426 struct pci_dev
*pdev
= dev
->nic_info
.pdev
;
428 status
= ocrdma_dealloc_ucontext_pd(uctx
);
430 ocrdma_del_mmap(uctx
, uctx
->ah_tbl
.pa
, uctx
->ah_tbl
.len
);
431 dma_free_coherent(&pdev
->dev
, uctx
->ah_tbl
.len
, uctx
->ah_tbl
.va
,
434 list_for_each_entry_safe(mm
, tmp
, &uctx
->mm_head
, entry
) {
435 list_del(&mm
->entry
);
442 int ocrdma_mmap(struct ib_ucontext
*context
, struct vm_area_struct
*vma
)
444 struct ocrdma_ucontext
*ucontext
= get_ocrdma_ucontext(context
);
445 struct ocrdma_dev
*dev
= get_ocrdma_dev(context
->device
);
446 unsigned long vm_page
= vma
->vm_pgoff
<< PAGE_SHIFT
;
447 u64 unmapped_db
= (u64
) dev
->nic_info
.unmapped_db
;
448 unsigned long len
= (vma
->vm_end
- vma
->vm_start
);
452 if (vma
->vm_start
& (PAGE_SIZE
- 1))
454 found
= ocrdma_search_mmap(ucontext
, vma
->vm_pgoff
<< PAGE_SHIFT
, len
);
458 if ((vm_page
>= unmapped_db
) && (vm_page
<= (unmapped_db
+
459 dev
->nic_info
.db_total_size
)) &&
460 (len
<= dev
->nic_info
.db_page_size
)) {
461 if (vma
->vm_flags
& VM_READ
)
464 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
465 status
= io_remap_pfn_range(vma
, vma
->vm_start
, vma
->vm_pgoff
,
466 len
, vma
->vm_page_prot
);
467 } else if (dev
->nic_info
.dpp_unmapped_len
&&
468 (vm_page
>= (u64
) dev
->nic_info
.dpp_unmapped_addr
) &&
469 (vm_page
<= (u64
) (dev
->nic_info
.dpp_unmapped_addr
+
470 dev
->nic_info
.dpp_unmapped_len
)) &&
471 (len
<= dev
->nic_info
.dpp_unmapped_len
)) {
472 if (vma
->vm_flags
& VM_READ
)
475 vma
->vm_page_prot
= pgprot_writecombine(vma
->vm_page_prot
);
476 status
= io_remap_pfn_range(vma
, vma
->vm_start
, vma
->vm_pgoff
,
477 len
, vma
->vm_page_prot
);
479 status
= remap_pfn_range(vma
, vma
->vm_start
,
480 vma
->vm_pgoff
, len
, vma
->vm_page_prot
);
485 static int ocrdma_copy_pd_uresp(struct ocrdma_dev
*dev
, struct ocrdma_pd
*pd
,
486 struct ib_ucontext
*ib_ctx
,
487 struct ib_udata
*udata
)
491 u64 dpp_page_addr
= 0;
493 struct ocrdma_alloc_pd_uresp rsp
;
494 struct ocrdma_ucontext
*uctx
= get_ocrdma_ucontext(ib_ctx
);
496 memset(&rsp
, 0, sizeof(rsp
));
498 rsp
.dpp_enabled
= pd
->dpp_enabled
;
499 db_page_addr
= ocrdma_get_db_addr(dev
, pd
->id
);
500 db_page_size
= dev
->nic_info
.db_page_size
;
502 status
= ocrdma_add_mmap(uctx
, db_page_addr
, db_page_size
);
506 if (pd
->dpp_enabled
) {
507 dpp_page_addr
= dev
->nic_info
.dpp_unmapped_addr
+
508 (pd
->id
* PAGE_SIZE
);
509 status
= ocrdma_add_mmap(uctx
, dpp_page_addr
,
513 rsp
.dpp_page_addr_hi
= upper_32_bits(dpp_page_addr
);
514 rsp
.dpp_page_addr_lo
= dpp_page_addr
;
517 status
= ib_copy_to_udata(udata
, &rsp
, sizeof(rsp
));
526 ocrdma_del_mmap(pd
->uctx
, dpp_page_addr
, PAGE_SIZE
);
528 ocrdma_del_mmap(pd
->uctx
, db_page_addr
, db_page_size
);
532 struct ib_pd
*ocrdma_alloc_pd(struct ib_device
*ibdev
,
533 struct ib_ucontext
*context
,
534 struct ib_udata
*udata
)
536 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibdev
);
537 struct ocrdma_pd
*pd
;
538 struct ocrdma_ucontext
*uctx
= NULL
;
540 u8 is_uctx_pd
= false;
542 if (udata
&& context
) {
543 uctx
= get_ocrdma_ucontext(context
);
544 pd
= ocrdma_get_ucontext_pd(uctx
);
551 pd
= _ocrdma_alloc_pd(dev
, uctx
, udata
);
553 status
= PTR_ERR(pd
);
558 if (udata
&& context
) {
559 status
= ocrdma_copy_pd_uresp(dev
, pd
, context
, udata
);
567 ocrdma_release_ucontext_pd(uctx
);
569 status
= ocrdma_mbx_dealloc_pd(dev
, pd
);
573 return ERR_PTR(status
);
576 int ocrdma_dealloc_pd(struct ib_pd
*ibpd
)
578 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
579 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
580 struct ocrdma_ucontext
*uctx
= NULL
;
586 u64 dpp_db
= dev
->nic_info
.dpp_unmapped_addr
+
587 (pd
->id
* PAGE_SIZE
);
589 ocrdma_del_mmap(pd
->uctx
, dpp_db
, PAGE_SIZE
);
590 usr_db
= ocrdma_get_db_addr(dev
, pd
->id
);
591 ocrdma_del_mmap(pd
->uctx
, usr_db
, dev
->nic_info
.db_page_size
);
593 if (is_ucontext_pd(uctx
, pd
)) {
594 ocrdma_release_ucontext_pd(uctx
);
598 status
= _ocrdma_dealloc_pd(dev
, pd
);
602 static int ocrdma_alloc_lkey(struct ocrdma_dev
*dev
, struct ocrdma_mr
*mr
,
603 u32 pdid
, int acc
, u32 num_pbls
, u32 addr_check
)
608 mr
->hwmr
.local_rd
= 1;
609 mr
->hwmr
.remote_rd
= (acc
& IB_ACCESS_REMOTE_READ
) ? 1 : 0;
610 mr
->hwmr
.remote_wr
= (acc
& IB_ACCESS_REMOTE_WRITE
) ? 1 : 0;
611 mr
->hwmr
.local_wr
= (acc
& IB_ACCESS_LOCAL_WRITE
) ? 1 : 0;
612 mr
->hwmr
.mw_bind
= (acc
& IB_ACCESS_MW_BIND
) ? 1 : 0;
613 mr
->hwmr
.remote_atomic
= (acc
& IB_ACCESS_REMOTE_ATOMIC
) ? 1 : 0;
614 mr
->hwmr
.num_pbls
= num_pbls
;
616 status
= ocrdma_mbx_alloc_lkey(dev
, &mr
->hwmr
, pdid
, addr_check
);
620 mr
->ibmr
.lkey
= mr
->hwmr
.lkey
;
621 if (mr
->hwmr
.remote_wr
|| mr
->hwmr
.remote_rd
)
622 mr
->ibmr
.rkey
= mr
->hwmr
.lkey
;
626 struct ib_mr
*ocrdma_get_dma_mr(struct ib_pd
*ibpd
, int acc
)
629 struct ocrdma_mr
*mr
;
630 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
631 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
633 if (acc
& IB_ACCESS_REMOTE_WRITE
&& !(acc
& IB_ACCESS_LOCAL_WRITE
)) {
634 pr_err("%s err, invalid access rights\n", __func__
);
635 return ERR_PTR(-EINVAL
);
638 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
640 return ERR_PTR(-ENOMEM
);
642 status
= ocrdma_alloc_lkey(dev
, mr
, pd
->id
, acc
, 0,
643 OCRDMA_ADDR_CHECK_DISABLE
);
646 return ERR_PTR(status
);
652 static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev
*dev
,
653 struct ocrdma_hw_mr
*mr
)
655 struct pci_dev
*pdev
= dev
->nic_info
.pdev
;
659 for (i
= 0; i
< mr
->num_pbls
; i
++) {
660 if (!mr
->pbl_table
[i
].va
)
662 dma_free_coherent(&pdev
->dev
, mr
->pbl_size
,
664 mr
->pbl_table
[i
].pa
);
666 kfree(mr
->pbl_table
);
667 mr
->pbl_table
= NULL
;
671 static int ocrdma_get_pbl_info(struct ocrdma_dev
*dev
, struct ocrdma_mr
*mr
,
680 pbl_size
= OCRDMA_MIN_HPAGE_SIZE
* (1 << idx
);
681 if (pbl_size
> MAX_OCRDMA_PBL_SIZE
) {
685 num_pbls
= roundup(num_pbes
, (pbl_size
/ sizeof(u64
)));
686 num_pbls
= num_pbls
/ (pbl_size
/ sizeof(u64
));
688 } while (num_pbls
>= dev
->attr
.max_num_mr_pbl
);
690 mr
->hwmr
.num_pbes
= num_pbes
;
691 mr
->hwmr
.num_pbls
= num_pbls
;
692 mr
->hwmr
.pbl_size
= pbl_size
;
696 static int ocrdma_build_pbl_tbl(struct ocrdma_dev
*dev
, struct ocrdma_hw_mr
*mr
)
700 u32 dma_len
= mr
->pbl_size
;
701 struct pci_dev
*pdev
= dev
->nic_info
.pdev
;
705 mr
->pbl_table
= kzalloc(sizeof(struct ocrdma_pbl
) *
706 mr
->num_pbls
, GFP_KERNEL
);
711 for (i
= 0; i
< mr
->num_pbls
; i
++) {
712 va
= dma_alloc_coherent(&pdev
->dev
, dma_len
, &pa
, GFP_KERNEL
);
714 ocrdma_free_mr_pbl_tbl(dev
, mr
);
718 memset(va
, 0, dma_len
);
719 mr
->pbl_table
[i
].va
= va
;
720 mr
->pbl_table
[i
].pa
= pa
;
725 static void build_user_pbes(struct ocrdma_dev
*dev
, struct ocrdma_mr
*mr
,
728 struct ocrdma_pbe
*pbe
;
729 struct ib_umem_chunk
*chunk
;
730 struct ocrdma_pbl
*pbl_tbl
= mr
->hwmr
.pbl_table
;
731 struct ib_umem
*umem
= mr
->umem
;
732 int i
, shift
, pg_cnt
, pages
, pbe_cnt
, total_num_pbes
= 0;
734 if (!mr
->hwmr
.num_pbes
)
737 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
740 shift
= ilog2(umem
->page_size
);
742 list_for_each_entry(chunk
, &umem
->chunk_list
, list
) {
743 /* get all the dma regions from the chunk. */
744 for (i
= 0; i
< chunk
->nmap
; i
++) {
745 pages
= sg_dma_len(&chunk
->page_list
[i
]) >> shift
;
746 for (pg_cnt
= 0; pg_cnt
< pages
; pg_cnt
++) {
747 /* store the page address in pbe */
749 cpu_to_le32(sg_dma_address
750 (&chunk
->page_list
[i
]) +
751 (umem
->page_size
* pg_cnt
));
753 cpu_to_le32(upper_32_bits
755 (&chunk
->page_list
[i
]) +
756 umem
->page_size
* pg_cnt
)));
761 /* if done building pbes, issue the mbx cmd. */
762 if (total_num_pbes
== num_pbes
)
765 /* if the given pbl is full storing the pbes,
769 (mr
->hwmr
.pbl_size
/ sizeof(u64
))) {
771 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
779 struct ib_mr
*ocrdma_reg_user_mr(struct ib_pd
*ibpd
, u64 start
, u64 len
,
780 u64 usr_addr
, int acc
, struct ib_udata
*udata
)
782 int status
= -ENOMEM
;
783 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
784 struct ocrdma_mr
*mr
;
785 struct ocrdma_pd
*pd
;
788 pd
= get_ocrdma_pd(ibpd
);
790 if (acc
& IB_ACCESS_REMOTE_WRITE
&& !(acc
& IB_ACCESS_LOCAL_WRITE
))
791 return ERR_PTR(-EINVAL
);
793 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
795 return ERR_PTR(status
);
796 mr
->umem
= ib_umem_get(ibpd
->uobject
->context
, start
, len
, acc
, 0);
797 if (IS_ERR(mr
->umem
)) {
801 num_pbes
= ib_umem_page_count(mr
->umem
);
802 status
= ocrdma_get_pbl_info(dev
, mr
, num_pbes
);
806 mr
->hwmr
.pbe_size
= mr
->umem
->page_size
;
807 mr
->hwmr
.fbo
= mr
->umem
->offset
;
808 mr
->hwmr
.va
= usr_addr
;
810 mr
->hwmr
.remote_wr
= (acc
& IB_ACCESS_REMOTE_WRITE
) ? 1 : 0;
811 mr
->hwmr
.remote_rd
= (acc
& IB_ACCESS_REMOTE_READ
) ? 1 : 0;
812 mr
->hwmr
.local_wr
= (acc
& IB_ACCESS_LOCAL_WRITE
) ? 1 : 0;
813 mr
->hwmr
.local_rd
= 1;
814 mr
->hwmr
.remote_atomic
= (acc
& IB_ACCESS_REMOTE_ATOMIC
) ? 1 : 0;
815 status
= ocrdma_build_pbl_tbl(dev
, &mr
->hwmr
);
818 build_user_pbes(dev
, mr
, num_pbes
);
819 status
= ocrdma_reg_mr(dev
, &mr
->hwmr
, pd
->id
, acc
);
822 mr
->ibmr
.lkey
= mr
->hwmr
.lkey
;
823 if (mr
->hwmr
.remote_wr
|| mr
->hwmr
.remote_rd
)
824 mr
->ibmr
.rkey
= mr
->hwmr
.lkey
;
829 ocrdma_free_mr_pbl_tbl(dev
, &mr
->hwmr
);
832 return ERR_PTR(status
);
835 int ocrdma_dereg_mr(struct ib_mr
*ib_mr
)
837 struct ocrdma_mr
*mr
= get_ocrdma_mr(ib_mr
);
838 struct ocrdma_dev
*dev
= get_ocrdma_dev(ib_mr
->device
);
841 status
= ocrdma_mbx_dealloc_lkey(dev
, mr
->hwmr
.fr_mr
, mr
->hwmr
.lkey
);
843 if (mr
->hwmr
.fr_mr
== 0)
844 ocrdma_free_mr_pbl_tbl(dev
, &mr
->hwmr
);
846 /* it could be user registered memory. */
848 ib_umem_release(mr
->umem
);
853 static int ocrdma_copy_cq_uresp(struct ocrdma_dev
*dev
, struct ocrdma_cq
*cq
,
854 struct ib_udata
*udata
,
855 struct ib_ucontext
*ib_ctx
)
858 struct ocrdma_ucontext
*uctx
= get_ocrdma_ucontext(ib_ctx
);
859 struct ocrdma_create_cq_uresp uresp
;
861 memset(&uresp
, 0, sizeof(uresp
));
862 uresp
.cq_id
= cq
->id
;
863 uresp
.page_size
= PAGE_ALIGN(cq
->len
);
865 uresp
.max_hw_cqe
= cq
->max_hw_cqe
;
866 uresp
.page_addr
[0] = cq
->pa
;
867 uresp
.db_page_addr
= ocrdma_get_db_addr(dev
, uctx
->cntxt_pd
->id
);
868 uresp
.db_page_size
= dev
->nic_info
.db_page_size
;
869 uresp
.phase_change
= cq
->phase_change
? 1 : 0;
870 status
= ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
872 pr_err("%s(%d) copy error cqid=0x%x.\n",
873 __func__
, dev
->id
, cq
->id
);
876 status
= ocrdma_add_mmap(uctx
, uresp
.db_page_addr
, uresp
.db_page_size
);
879 status
= ocrdma_add_mmap(uctx
, uresp
.page_addr
[0], uresp
.page_size
);
881 ocrdma_del_mmap(uctx
, uresp
.db_page_addr
, uresp
.db_page_size
);
889 struct ib_cq
*ocrdma_create_cq(struct ib_device
*ibdev
, int entries
, int vector
,
890 struct ib_ucontext
*ib_ctx
,
891 struct ib_udata
*udata
)
893 struct ocrdma_cq
*cq
;
894 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibdev
);
895 struct ocrdma_ucontext
*uctx
= NULL
;
898 struct ocrdma_create_cq_ureq ureq
;
901 if (ib_copy_from_udata(&ureq
, udata
, sizeof(ureq
)))
902 return ERR_PTR(-EFAULT
);
905 cq
= kzalloc(sizeof(*cq
), GFP_KERNEL
);
907 return ERR_PTR(-ENOMEM
);
909 spin_lock_init(&cq
->cq_lock
);
910 spin_lock_init(&cq
->comp_handler_lock
);
911 INIT_LIST_HEAD(&cq
->sq_head
);
912 INIT_LIST_HEAD(&cq
->rq_head
);
915 uctx
= get_ocrdma_ucontext(ib_ctx
);
916 pd_id
= uctx
->cntxt_pd
->id
;
919 status
= ocrdma_mbx_create_cq(dev
, cq
, entries
, ureq
.dpp_cq
, pd_id
);
922 return ERR_PTR(status
);
925 status
= ocrdma_copy_cq_uresp(dev
, cq
, udata
, ib_ctx
);
929 cq
->phase
= OCRDMA_CQE_VALID
;
930 cq
->arm_needed
= true;
931 dev
->cq_tbl
[cq
->id
] = cq
;
936 ocrdma_mbx_destroy_cq(dev
, cq
);
938 return ERR_PTR(status
);
941 int ocrdma_resize_cq(struct ib_cq
*ibcq
, int new_cnt
,
942 struct ib_udata
*udata
)
945 struct ocrdma_cq
*cq
= get_ocrdma_cq(ibcq
);
947 if (new_cnt
< 1 || new_cnt
> cq
->max_hw_cqe
) {
955 int ocrdma_destroy_cq(struct ib_cq
*ibcq
)
958 struct ocrdma_cq
*cq
= get_ocrdma_cq(ibcq
);
959 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibcq
->device
);
962 status
= ocrdma_mbx_destroy_cq(dev
, cq
);
965 pdid
= cq
->ucontext
->cntxt_pd
->id
;
966 ocrdma_del_mmap(cq
->ucontext
, (u64
) cq
->pa
,
967 PAGE_ALIGN(cq
->len
));
968 ocrdma_del_mmap(cq
->ucontext
,
969 ocrdma_get_db_addr(dev
, pdid
),
970 dev
->nic_info
.db_page_size
);
972 dev
->cq_tbl
[cq
->id
] = NULL
;
978 static int ocrdma_add_qpn_map(struct ocrdma_dev
*dev
, struct ocrdma_qp
*qp
)
980 int status
= -EINVAL
;
982 if (qp
->id
< OCRDMA_MAX_QP
&& dev
->qp_tbl
[qp
->id
] == NULL
) {
983 dev
->qp_tbl
[qp
->id
] = qp
;
989 static void ocrdma_del_qpn_map(struct ocrdma_dev
*dev
, struct ocrdma_qp
*qp
)
991 dev
->qp_tbl
[qp
->id
] = NULL
;
994 static int ocrdma_check_qp_params(struct ib_pd
*ibpd
, struct ocrdma_dev
*dev
,
995 struct ib_qp_init_attr
*attrs
)
997 if ((attrs
->qp_type
!= IB_QPT_GSI
) &&
998 (attrs
->qp_type
!= IB_QPT_RC
) &&
999 (attrs
->qp_type
!= IB_QPT_UC
) &&
1000 (attrs
->qp_type
!= IB_QPT_UD
)) {
1001 pr_err("%s(%d) unsupported qp type=0x%x requested\n",
1002 __func__
, dev
->id
, attrs
->qp_type
);
1005 /* Skip the check for QP1 to support CM size of 128 */
1006 if ((attrs
->qp_type
!= IB_QPT_GSI
) &&
1007 (attrs
->cap
.max_send_wr
> dev
->attr
.max_wqe
)) {
1008 pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
1009 __func__
, dev
->id
, attrs
->cap
.max_send_wr
);
1010 pr_err("%s(%d) supported send_wr=0x%x\n",
1011 __func__
, dev
->id
, dev
->attr
.max_wqe
);
1014 if (!attrs
->srq
&& (attrs
->cap
.max_recv_wr
> dev
->attr
.max_rqe
)) {
1015 pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
1016 __func__
, dev
->id
, attrs
->cap
.max_recv_wr
);
1017 pr_err("%s(%d) supported recv_wr=0x%x\n",
1018 __func__
, dev
->id
, dev
->attr
.max_rqe
);
1021 if (attrs
->cap
.max_inline_data
> dev
->attr
.max_inline_data
) {
1022 pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
1023 __func__
, dev
->id
, attrs
->cap
.max_inline_data
);
1024 pr_err("%s(%d) supported inline data size=0x%x\n",
1025 __func__
, dev
->id
, dev
->attr
.max_inline_data
);
1028 if (attrs
->cap
.max_send_sge
> dev
->attr
.max_send_sge
) {
1029 pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
1030 __func__
, dev
->id
, attrs
->cap
.max_send_sge
);
1031 pr_err("%s(%d) supported send_sge=0x%x\n",
1032 __func__
, dev
->id
, dev
->attr
.max_send_sge
);
1035 if (attrs
->cap
.max_recv_sge
> dev
->attr
.max_recv_sge
) {
1036 pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
1037 __func__
, dev
->id
, attrs
->cap
.max_recv_sge
);
1038 pr_err("%s(%d) supported recv_sge=0x%x\n",
1039 __func__
, dev
->id
, dev
->attr
.max_recv_sge
);
1042 /* unprivileged user space cannot create special QP */
1043 if (ibpd
->uobject
&& attrs
->qp_type
== IB_QPT_GSI
) {
1045 ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
1046 __func__
, dev
->id
, attrs
->qp_type
);
1049 /* allow creating only one GSI type of QP */
1050 if (attrs
->qp_type
== IB_QPT_GSI
&& dev
->gsi_qp_created
) {
1051 pr_err("%s(%d) GSI special QPs already created.\n",
1055 /* verify consumer QPs are not trying to use GSI QP's CQ */
1056 if ((attrs
->qp_type
!= IB_QPT_GSI
) && (dev
->gsi_qp_created
)) {
1057 if ((dev
->gsi_sqcq
== get_ocrdma_cq(attrs
->send_cq
)) ||
1058 (dev
->gsi_rqcq
== get_ocrdma_cq(attrs
->recv_cq
))) {
1059 pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
1067 static int ocrdma_copy_qp_uresp(struct ocrdma_qp
*qp
,
1068 struct ib_udata
*udata
, int dpp_offset
,
1069 int dpp_credit_lmt
, int srq
)
1073 struct ocrdma_create_qp_uresp uresp
;
1074 struct ocrdma_dev
*dev
= qp
->dev
;
1075 struct ocrdma_pd
*pd
= qp
->pd
;
1077 memset(&uresp
, 0, sizeof(uresp
));
1078 usr_db
= dev
->nic_info
.unmapped_db
+
1079 (pd
->id
* dev
->nic_info
.db_page_size
);
1080 uresp
.qp_id
= qp
->id
;
1081 uresp
.sq_dbid
= qp
->sq
.dbid
;
1082 uresp
.num_sq_pages
= 1;
1083 uresp
.sq_page_size
= PAGE_ALIGN(qp
->sq
.len
);
1084 uresp
.sq_page_addr
[0] = qp
->sq
.pa
;
1085 uresp
.num_wqe_allocated
= qp
->sq
.max_cnt
;
1087 uresp
.rq_dbid
= qp
->rq
.dbid
;
1088 uresp
.num_rq_pages
= 1;
1089 uresp
.rq_page_size
= PAGE_ALIGN(qp
->rq
.len
);
1090 uresp
.rq_page_addr
[0] = qp
->rq
.pa
;
1091 uresp
.num_rqe_allocated
= qp
->rq
.max_cnt
;
1093 uresp
.db_page_addr
= usr_db
;
1094 uresp
.db_page_size
= dev
->nic_info
.db_page_size
;
1095 if (dev
->nic_info
.dev_family
== OCRDMA_GEN2_FAMILY
) {
1096 uresp
.db_sq_offset
= OCRDMA_DB_GEN2_SQ_OFFSET
;
1097 uresp
.db_rq_offset
= OCRDMA_DB_GEN2_RQ_OFFSET
;
1098 uresp
.db_shift
= 24;
1100 uresp
.db_sq_offset
= OCRDMA_DB_SQ_OFFSET
;
1101 uresp
.db_rq_offset
= OCRDMA_DB_RQ_OFFSET
;
1102 uresp
.db_shift
= 16;
1105 if (qp
->dpp_enabled
) {
1106 uresp
.dpp_credit
= dpp_credit_lmt
;
1107 uresp
.dpp_offset
= dpp_offset
;
1109 status
= ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
1111 pr_err("%s(%d) user copy error.\n", __func__
, dev
->id
);
1114 status
= ocrdma_add_mmap(pd
->uctx
, uresp
.sq_page_addr
[0],
1115 uresp
.sq_page_size
);
1120 status
= ocrdma_add_mmap(pd
->uctx
, uresp
.rq_page_addr
[0],
1121 uresp
.rq_page_size
);
1127 ocrdma_del_mmap(pd
->uctx
, uresp
.sq_page_addr
[0], uresp
.sq_page_size
);
1132 static void ocrdma_set_qp_db(struct ocrdma_dev
*dev
, struct ocrdma_qp
*qp
,
1133 struct ocrdma_pd
*pd
)
1135 if (dev
->nic_info
.dev_family
== OCRDMA_GEN2_FAMILY
) {
1136 qp
->sq_db
= dev
->nic_info
.db
+
1137 (pd
->id
* dev
->nic_info
.db_page_size
) +
1138 OCRDMA_DB_GEN2_SQ_OFFSET
;
1139 qp
->rq_db
= dev
->nic_info
.db
+
1140 (pd
->id
* dev
->nic_info
.db_page_size
) +
1141 OCRDMA_DB_GEN2_RQ_OFFSET
;
1143 qp
->sq_db
= dev
->nic_info
.db
+
1144 (pd
->id
* dev
->nic_info
.db_page_size
) +
1145 OCRDMA_DB_SQ_OFFSET
;
1146 qp
->rq_db
= dev
->nic_info
.db
+
1147 (pd
->id
* dev
->nic_info
.db_page_size
) +
1148 OCRDMA_DB_RQ_OFFSET
;
1152 static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp
*qp
)
1155 kzalloc(sizeof(*(qp
->wqe_wr_id_tbl
)) * qp
->sq
.max_cnt
,
1157 if (qp
->wqe_wr_id_tbl
== NULL
)
1160 kzalloc(sizeof(u64
) * qp
->rq
.max_cnt
, GFP_KERNEL
);
1161 if (qp
->rqe_wr_id_tbl
== NULL
)
1167 static void ocrdma_set_qp_init_params(struct ocrdma_qp
*qp
,
1168 struct ocrdma_pd
*pd
,
1169 struct ib_qp_init_attr
*attrs
)
1172 spin_lock_init(&qp
->q_lock
);
1173 INIT_LIST_HEAD(&qp
->sq_entry
);
1174 INIT_LIST_HEAD(&qp
->rq_entry
);
1176 qp
->qp_type
= attrs
->qp_type
;
1177 qp
->cap_flags
= OCRDMA_QP_INB_RD
| OCRDMA_QP_INB_WR
;
1178 qp
->max_inline_data
= attrs
->cap
.max_inline_data
;
1179 qp
->sq
.max_sges
= attrs
->cap
.max_send_sge
;
1180 qp
->rq
.max_sges
= attrs
->cap
.max_recv_sge
;
1181 qp
->state
= OCRDMA_QPS_RST
;
1182 qp
->signaled
= (attrs
->sq_sig_type
== IB_SIGNAL_ALL_WR
) ? true : false;
1186 static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev
*dev
,
1187 struct ib_qp_init_attr
*attrs
)
1189 if (attrs
->qp_type
== IB_QPT_GSI
) {
1190 dev
->gsi_qp_created
= 1;
1191 dev
->gsi_sqcq
= get_ocrdma_cq(attrs
->send_cq
);
1192 dev
->gsi_rqcq
= get_ocrdma_cq(attrs
->recv_cq
);
1196 struct ib_qp
*ocrdma_create_qp(struct ib_pd
*ibpd
,
1197 struct ib_qp_init_attr
*attrs
,
1198 struct ib_udata
*udata
)
1201 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
1202 struct ocrdma_qp
*qp
;
1203 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
1204 struct ocrdma_create_qp_ureq ureq
;
1205 u16 dpp_credit_lmt
, dpp_offset
;
1207 status
= ocrdma_check_qp_params(ibpd
, dev
, attrs
);
1211 memset(&ureq
, 0, sizeof(ureq
));
1213 if (ib_copy_from_udata(&ureq
, udata
, sizeof(ureq
)))
1214 return ERR_PTR(-EFAULT
);
1216 qp
= kzalloc(sizeof(*qp
), GFP_KERNEL
);
1222 ocrdma_set_qp_init_params(qp
, pd
, attrs
);
1224 qp
->cap_flags
|= (OCRDMA_QP_MW_BIND
| OCRDMA_QP_LKEY0
|
1225 OCRDMA_QP_FAST_REG
);
1227 mutex_lock(&dev
->dev_lock
);
1228 status
= ocrdma_mbx_create_qp(qp
, attrs
, ureq
.enable_dpp_cq
,
1230 &dpp_offset
, &dpp_credit_lmt
);
1234 /* user space QP's wr_id table are managed in library */
1235 if (udata
== NULL
) {
1236 status
= ocrdma_alloc_wr_id_tbl(qp
);
1241 status
= ocrdma_add_qpn_map(dev
, qp
);
1244 ocrdma_set_qp_db(dev
, qp
, pd
);
1246 status
= ocrdma_copy_qp_uresp(qp
, udata
, dpp_offset
,
1248 (attrs
->srq
!= NULL
));
1252 ocrdma_store_gsi_qp_cq(dev
, attrs
);
1253 qp
->ibqp
.qp_num
= qp
->id
;
1254 mutex_unlock(&dev
->dev_lock
);
1258 ocrdma_del_qpn_map(dev
, qp
);
1260 ocrdma_mbx_destroy_qp(dev
, qp
);
1262 mutex_unlock(&dev
->dev_lock
);
1263 kfree(qp
->wqe_wr_id_tbl
);
1264 kfree(qp
->rqe_wr_id_tbl
);
1266 pr_err("%s(%d) error=%d\n", __func__
, dev
->id
, status
);
1268 return ERR_PTR(status
);
1272 static void ocrdma_flush_rq_db(struct ocrdma_qp
*qp
)
1275 u32 val
= qp
->rq
.dbid
| (qp
->db_cache
<<
1276 ocrdma_get_num_posted_shift(qp
));
1277 iowrite32(val
, qp
->rq_db
);
1282 int _ocrdma_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
1286 struct ocrdma_qp
*qp
;
1287 struct ocrdma_dev
*dev
;
1288 enum ib_qp_state old_qps
;
1290 qp
= get_ocrdma_qp(ibqp
);
1292 if (attr_mask
& IB_QP_STATE
)
1293 status
= ocrdma_qp_state_change(qp
, attr
->qp_state
, &old_qps
);
1294 /* if new and previous states are same hw doesn't need to
1299 status
= ocrdma_mbx_modify_qp(dev
, qp
, attr
, attr_mask
, old_qps
);
1300 if (!status
&& attr_mask
& IB_QP_STATE
&& attr
->qp_state
== IB_QPS_RTR
)
1301 ocrdma_flush_rq_db(qp
);
1306 int ocrdma_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
1307 int attr_mask
, struct ib_udata
*udata
)
1309 unsigned long flags
;
1310 int status
= -EINVAL
;
1311 struct ocrdma_qp
*qp
;
1312 struct ocrdma_dev
*dev
;
1313 enum ib_qp_state old_qps
, new_qps
;
1315 qp
= get_ocrdma_qp(ibqp
);
1318 /* syncronize with multiple context trying to change, retrive qps */
1319 mutex_lock(&dev
->dev_lock
);
1320 /* syncronize with wqe, rqe posting and cqe processing contexts */
1321 spin_lock_irqsave(&qp
->q_lock
, flags
);
1322 old_qps
= get_ibqp_state(qp
->state
);
1323 if (attr_mask
& IB_QP_STATE
)
1324 new_qps
= attr
->qp_state
;
1327 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
1329 if (!ib_modify_qp_is_ok(old_qps
, new_qps
, ibqp
->qp_type
, attr_mask
,
1330 IB_LINK_LAYER_ETHERNET
)) {
1331 pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1332 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1333 __func__
, dev
->id
, attr_mask
, qp
->id
, ibqp
->qp_type
,
1338 status
= _ocrdma_modify_qp(ibqp
, attr
, attr_mask
);
1342 mutex_unlock(&dev
->dev_lock
);
1346 static enum ib_mtu
ocrdma_mtu_int_to_enum(u16 mtu
)
1364 static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags
)
1366 int ib_qp_acc_flags
= 0;
1368 if (qp_cap_flags
& OCRDMA_QP_INB_WR
)
1369 ib_qp_acc_flags
|= IB_ACCESS_REMOTE_WRITE
;
1370 if (qp_cap_flags
& OCRDMA_QP_INB_RD
)
1371 ib_qp_acc_flags
|= IB_ACCESS_LOCAL_WRITE
;
1372 return ib_qp_acc_flags
;
1375 int ocrdma_query_qp(struct ib_qp
*ibqp
,
1376 struct ib_qp_attr
*qp_attr
,
1377 int attr_mask
, struct ib_qp_init_attr
*qp_init_attr
)
1381 struct ocrdma_qp_params params
;
1382 struct ocrdma_qp
*qp
= get_ocrdma_qp(ibqp
);
1383 struct ocrdma_dev
*dev
= qp
->dev
;
1385 memset(¶ms
, 0, sizeof(params
));
1386 mutex_lock(&dev
->dev_lock
);
1387 status
= ocrdma_mbx_query_qp(dev
, qp
, ¶ms
);
1388 mutex_unlock(&dev
->dev_lock
);
1391 qp_attr
->qp_state
= get_ibqp_state(IB_QPS_INIT
);
1392 qp_attr
->cur_qp_state
= get_ibqp_state(IB_QPS_INIT
);
1394 ocrdma_mtu_int_to_enum(params
.path_mtu_pkey_indx
&
1395 OCRDMA_QP_PARAMS_PATH_MTU_MASK
) >>
1396 OCRDMA_QP_PARAMS_PATH_MTU_SHIFT
;
1397 qp_attr
->path_mig_state
= IB_MIG_MIGRATED
;
1398 qp_attr
->rq_psn
= params
.hop_lmt_rq_psn
& OCRDMA_QP_PARAMS_RQ_PSN_MASK
;
1399 qp_attr
->sq_psn
= params
.tclass_sq_psn
& OCRDMA_QP_PARAMS_SQ_PSN_MASK
;
1400 qp_attr
->dest_qp_num
=
1401 params
.ack_to_rnr_rtc_dest_qpn
& OCRDMA_QP_PARAMS_DEST_QPN_MASK
;
1403 qp_attr
->qp_access_flags
= ocrdma_to_ib_qp_acc_flags(qp
->cap_flags
);
1404 qp_attr
->cap
.max_send_wr
= qp
->sq
.max_cnt
- 1;
1405 qp_attr
->cap
.max_recv_wr
= qp
->rq
.max_cnt
- 1;
1406 qp_attr
->cap
.max_send_sge
= qp
->sq
.max_sges
;
1407 qp_attr
->cap
.max_recv_sge
= qp
->rq
.max_sges
;
1408 qp_attr
->cap
.max_inline_data
= qp
->max_inline_data
;
1409 qp_init_attr
->cap
= qp_attr
->cap
;
1410 memcpy(&qp_attr
->ah_attr
.grh
.dgid
, ¶ms
.dgid
[0],
1411 sizeof(params
.dgid
));
1412 qp_attr
->ah_attr
.grh
.flow_label
= params
.rnt_rc_sl_fl
&
1413 OCRDMA_QP_PARAMS_FLOW_LABEL_MASK
;
1414 qp_attr
->ah_attr
.grh
.sgid_index
= qp
->sgid_idx
;
1415 qp_attr
->ah_attr
.grh
.hop_limit
= (params
.hop_lmt_rq_psn
&
1416 OCRDMA_QP_PARAMS_HOP_LMT_MASK
) >>
1417 OCRDMA_QP_PARAMS_HOP_LMT_SHIFT
;
1418 qp_attr
->ah_attr
.grh
.traffic_class
= (params
.tclass_sq_psn
&
1419 OCRDMA_QP_PARAMS_TCLASS_MASK
) >>
1420 OCRDMA_QP_PARAMS_TCLASS_SHIFT
;
1422 qp_attr
->ah_attr
.ah_flags
= IB_AH_GRH
;
1423 qp_attr
->ah_attr
.port_num
= 1;
1424 qp_attr
->ah_attr
.sl
= (params
.rnt_rc_sl_fl
&
1425 OCRDMA_QP_PARAMS_SL_MASK
) >>
1426 OCRDMA_QP_PARAMS_SL_SHIFT
;
1427 qp_attr
->timeout
= (params
.ack_to_rnr_rtc_dest_qpn
&
1428 OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK
) >>
1429 OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT
;
1430 qp_attr
->rnr_retry
= (params
.ack_to_rnr_rtc_dest_qpn
&
1431 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK
) >>
1432 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT
;
1433 qp_attr
->retry_cnt
=
1434 (params
.rnt_rc_sl_fl
& OCRDMA_QP_PARAMS_RETRY_CNT_MASK
) >>
1435 OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT
;
1436 qp_attr
->min_rnr_timer
= 0;
1437 qp_attr
->pkey_index
= 0;
1438 qp_attr
->port_num
= 1;
1439 qp_attr
->ah_attr
.src_path_bits
= 0;
1440 qp_attr
->ah_attr
.static_rate
= 0;
1441 qp_attr
->alt_pkey_index
= 0;
1442 qp_attr
->alt_port_num
= 0;
1443 qp_attr
->alt_timeout
= 0;
1444 memset(&qp_attr
->alt_ah_attr
, 0, sizeof(qp_attr
->alt_ah_attr
));
1445 qp_state
= (params
.max_sge_recv_flags
& OCRDMA_QP_PARAMS_STATE_MASK
) >>
1446 OCRDMA_QP_PARAMS_STATE_SHIFT
;
1447 qp_attr
->sq_draining
= (qp_state
== OCRDMA_QPS_SQ_DRAINING
) ? 1 : 0;
1448 qp_attr
->max_dest_rd_atomic
=
1449 params
.max_ord_ird
>> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT
;
1450 qp_attr
->max_rd_atomic
=
1451 params
.max_ord_ird
& OCRDMA_QP_PARAMS_MAX_IRD_MASK
;
1452 qp_attr
->en_sqd_async_notify
= (params
.max_sge_recv_flags
&
1453 OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC
) ? 1 : 0;
1458 static void ocrdma_srq_toggle_bit(struct ocrdma_srq
*srq
, int idx
)
1461 unsigned int mask
= (1 << (idx
% 32));
1463 if (srq
->idx_bit_fields
[i
] & mask
)
1464 srq
->idx_bit_fields
[i
] &= ~mask
;
1466 srq
->idx_bit_fields
[i
] |= mask
;
1469 static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info
*q
)
1471 return ((q
->max_wqe_idx
- q
->head
) + q
->tail
) % q
->max_cnt
;
1474 static int is_hw_sq_empty(struct ocrdma_qp
*qp
)
1476 return (qp
->sq
.tail
== qp
->sq
.head
);
1479 static int is_hw_rq_empty(struct ocrdma_qp
*qp
)
1481 return (qp
->rq
.tail
== qp
->rq
.head
);
1484 static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info
*q
)
1486 return q
->va
+ (q
->head
* q
->entry_size
);
1489 static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info
*q
,
1492 return q
->va
+ (idx
* q
->entry_size
);
1495 static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info
*q
)
1497 q
->head
= (q
->head
+ 1) & q
->max_wqe_idx
;
1500 static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info
*q
)
1502 q
->tail
= (q
->tail
+ 1) & q
->max_wqe_idx
;
1505 /* discard the cqe for a given QP */
1506 static void ocrdma_discard_cqes(struct ocrdma_qp
*qp
, struct ocrdma_cq
*cq
)
1508 unsigned long cq_flags
;
1509 unsigned long flags
;
1510 int discard_cnt
= 0;
1511 u32 cur_getp
, stop_getp
;
1512 struct ocrdma_cqe
*cqe
;
1515 spin_lock_irqsave(&cq
->cq_lock
, cq_flags
);
1517 /* traverse through the CQEs in the hw CQ,
1518 * find the matching CQE for a given qp,
1519 * mark the matching one discarded by clearing qpn.
1520 * ring the doorbell in the poll_cq() as
1521 * we don't complete out of order cqe.
1524 cur_getp
= cq
->getp
;
1525 /* find upto when do we reap the cq. */
1526 stop_getp
= cur_getp
;
1528 if (is_hw_sq_empty(qp
) && (!qp
->srq
&& is_hw_rq_empty(qp
)))
1531 cqe
= cq
->va
+ cur_getp
;
1532 /* if (a) done reaping whole hw cq, or
1533 * (b) qp_xq becomes empty.
1536 qpn
= cqe
->cmn
.qpn
& OCRDMA_CQE_QPN_MASK
;
1537 /* if previously discarded cqe found, skip that too. */
1538 /* check for matching qp */
1539 if (qpn
== 0 || qpn
!= qp
->id
)
1542 /* mark cqe discarded so that it is not picked up later
1547 if (is_cqe_for_sq(cqe
)) {
1548 ocrdma_hwq_inc_tail(&qp
->sq
);
1551 spin_lock_irqsave(&qp
->srq
->q_lock
, flags
);
1552 ocrdma_hwq_inc_tail(&qp
->srq
->rq
);
1553 ocrdma_srq_toggle_bit(qp
->srq
, cur_getp
);
1554 spin_unlock_irqrestore(&qp
->srq
->q_lock
, flags
);
1557 ocrdma_hwq_inc_tail(&qp
->rq
);
1561 cur_getp
= (cur_getp
+ 1) % cq
->max_hw_cqe
;
1562 } while (cur_getp
!= stop_getp
);
1563 spin_unlock_irqrestore(&cq
->cq_lock
, cq_flags
);
1566 void ocrdma_del_flush_qp(struct ocrdma_qp
*qp
)
1569 unsigned long flags
;
1570 struct ocrdma_dev
*dev
= qp
->dev
;
1571 /* sync with any active CQ poll */
1573 spin_lock_irqsave(&dev
->flush_q_lock
, flags
);
1574 found
= ocrdma_is_qp_in_sq_flushlist(qp
->sq_cq
, qp
);
1576 list_del(&qp
->sq_entry
);
1578 found
= ocrdma_is_qp_in_rq_flushlist(qp
->rq_cq
, qp
);
1580 list_del(&qp
->rq_entry
);
1582 spin_unlock_irqrestore(&dev
->flush_q_lock
, flags
);
1585 int ocrdma_destroy_qp(struct ib_qp
*ibqp
)
1588 struct ocrdma_pd
*pd
;
1589 struct ocrdma_qp
*qp
;
1590 struct ocrdma_dev
*dev
;
1591 struct ib_qp_attr attrs
;
1592 int attr_mask
= IB_QP_STATE
;
1593 unsigned long flags
;
1595 qp
= get_ocrdma_qp(ibqp
);
1598 attrs
.qp_state
= IB_QPS_ERR
;
1601 /* change the QP state to ERROR */
1602 _ocrdma_modify_qp(ibqp
, &attrs
, attr_mask
);
1604 /* ensure that CQEs for newly created QP (whose id may be same with
1605 * one which just getting destroyed are same), dont get
1606 * discarded until the old CQEs are discarded.
1608 mutex_lock(&dev
->dev_lock
);
1609 status
= ocrdma_mbx_destroy_qp(dev
, qp
);
1612 * acquire CQ lock while destroy is in progress, in order to
1613 * protect against proessing in-flight CQEs for this QP.
1615 spin_lock_irqsave(&qp
->sq_cq
->cq_lock
, flags
);
1616 if (qp
->rq_cq
&& (qp
->rq_cq
!= qp
->sq_cq
))
1617 spin_lock(&qp
->rq_cq
->cq_lock
);
1619 ocrdma_del_qpn_map(dev
, qp
);
1621 if (qp
->rq_cq
&& (qp
->rq_cq
!= qp
->sq_cq
))
1622 spin_unlock(&qp
->rq_cq
->cq_lock
);
1623 spin_unlock_irqrestore(&qp
->sq_cq
->cq_lock
, flags
);
1626 ocrdma_discard_cqes(qp
, qp
->sq_cq
);
1627 ocrdma_discard_cqes(qp
, qp
->rq_cq
);
1629 mutex_unlock(&dev
->dev_lock
);
1632 ocrdma_del_mmap(pd
->uctx
, (u64
) qp
->sq
.pa
,
1633 PAGE_ALIGN(qp
->sq
.len
));
1635 ocrdma_del_mmap(pd
->uctx
, (u64
) qp
->rq
.pa
,
1636 PAGE_ALIGN(qp
->rq
.len
));
1639 ocrdma_del_flush_qp(qp
);
1641 kfree(qp
->wqe_wr_id_tbl
);
1642 kfree(qp
->rqe_wr_id_tbl
);
1647 static int ocrdma_copy_srq_uresp(struct ocrdma_dev
*dev
, struct ocrdma_srq
*srq
,
1648 struct ib_udata
*udata
)
1651 struct ocrdma_create_srq_uresp uresp
;
1653 memset(&uresp
, 0, sizeof(uresp
));
1654 uresp
.rq_dbid
= srq
->rq
.dbid
;
1655 uresp
.num_rq_pages
= 1;
1656 uresp
.rq_page_addr
[0] = srq
->rq
.pa
;
1657 uresp
.rq_page_size
= srq
->rq
.len
;
1658 uresp
.db_page_addr
= dev
->nic_info
.unmapped_db
+
1659 (srq
->pd
->id
* dev
->nic_info
.db_page_size
);
1660 uresp
.db_page_size
= dev
->nic_info
.db_page_size
;
1661 uresp
.num_rqe_allocated
= srq
->rq
.max_cnt
;
1662 if (dev
->nic_info
.dev_family
== OCRDMA_GEN2_FAMILY
) {
1663 uresp
.db_rq_offset
= OCRDMA_DB_GEN2_RQ_OFFSET
;
1664 uresp
.db_shift
= 24;
1666 uresp
.db_rq_offset
= OCRDMA_DB_RQ_OFFSET
;
1667 uresp
.db_shift
= 16;
1670 status
= ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
1673 status
= ocrdma_add_mmap(srq
->pd
->uctx
, uresp
.rq_page_addr
[0],
1674 uresp
.rq_page_size
);
1680 struct ib_srq
*ocrdma_create_srq(struct ib_pd
*ibpd
,
1681 struct ib_srq_init_attr
*init_attr
,
1682 struct ib_udata
*udata
)
1684 int status
= -ENOMEM
;
1685 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
1686 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
1687 struct ocrdma_srq
*srq
;
1689 if (init_attr
->attr
.max_sge
> dev
->attr
.max_recv_sge
)
1690 return ERR_PTR(-EINVAL
);
1691 if (init_attr
->attr
.max_wr
> dev
->attr
.max_rqe
)
1692 return ERR_PTR(-EINVAL
);
1694 srq
= kzalloc(sizeof(*srq
), GFP_KERNEL
);
1696 return ERR_PTR(status
);
1698 spin_lock_init(&srq
->q_lock
);
1700 srq
->db
= dev
->nic_info
.db
+ (pd
->id
* dev
->nic_info
.db_page_size
);
1701 status
= ocrdma_mbx_create_srq(dev
, srq
, init_attr
, pd
);
1705 if (udata
== NULL
) {
1706 srq
->rqe_wr_id_tbl
= kzalloc(sizeof(u64
) * srq
->rq
.max_cnt
,
1708 if (srq
->rqe_wr_id_tbl
== NULL
)
1711 srq
->bit_fields_len
= (srq
->rq
.max_cnt
/ 32) +
1712 (srq
->rq
.max_cnt
% 32 ? 1 : 0);
1713 srq
->idx_bit_fields
=
1714 kmalloc(srq
->bit_fields_len
* sizeof(u32
), GFP_KERNEL
);
1715 if (srq
->idx_bit_fields
== NULL
)
1717 memset(srq
->idx_bit_fields
, 0xff,
1718 srq
->bit_fields_len
* sizeof(u32
));
1721 if (init_attr
->attr
.srq_limit
) {
1722 status
= ocrdma_mbx_modify_srq(srq
, &init_attr
->attr
);
1728 status
= ocrdma_copy_srq_uresp(dev
, srq
, udata
);
1736 ocrdma_mbx_destroy_srq(dev
, srq
);
1738 kfree(srq
->rqe_wr_id_tbl
);
1739 kfree(srq
->idx_bit_fields
);
1741 return ERR_PTR(status
);
1744 int ocrdma_modify_srq(struct ib_srq
*ibsrq
,
1745 struct ib_srq_attr
*srq_attr
,
1746 enum ib_srq_attr_mask srq_attr_mask
,
1747 struct ib_udata
*udata
)
1750 struct ocrdma_srq
*srq
;
1752 srq
= get_ocrdma_srq(ibsrq
);
1753 if (srq_attr_mask
& IB_SRQ_MAX_WR
)
1756 status
= ocrdma_mbx_modify_srq(srq
, srq_attr
);
1760 int ocrdma_query_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*srq_attr
)
1763 struct ocrdma_srq
*srq
;
1765 srq
= get_ocrdma_srq(ibsrq
);
1766 status
= ocrdma_mbx_query_srq(srq
, srq_attr
);
1770 int ocrdma_destroy_srq(struct ib_srq
*ibsrq
)
1773 struct ocrdma_srq
*srq
;
1774 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibsrq
->device
);
1776 srq
= get_ocrdma_srq(ibsrq
);
1778 status
= ocrdma_mbx_destroy_srq(dev
, srq
);
1781 ocrdma_del_mmap(srq
->pd
->uctx
, (u64
) srq
->rq
.pa
,
1782 PAGE_ALIGN(srq
->rq
.len
));
1784 kfree(srq
->idx_bit_fields
);
1785 kfree(srq
->rqe_wr_id_tbl
);
1790 /* unprivileged verbs and their support functions. */
1791 static void ocrdma_build_ud_hdr(struct ocrdma_qp
*qp
,
1792 struct ocrdma_hdr_wqe
*hdr
,
1793 struct ib_send_wr
*wr
)
1795 struct ocrdma_ewqe_ud_hdr
*ud_hdr
=
1796 (struct ocrdma_ewqe_ud_hdr
*)(hdr
+ 1);
1797 struct ocrdma_ah
*ah
= get_ocrdma_ah(wr
->wr
.ud
.ah
);
1799 ud_hdr
->rsvd_dest_qpn
= wr
->wr
.ud
.remote_qpn
;
1800 if (qp
->qp_type
== IB_QPT_GSI
)
1801 ud_hdr
->qkey
= qp
->qkey
;
1803 ud_hdr
->qkey
= wr
->wr
.ud
.remote_qkey
;
1804 ud_hdr
->rsvd_ahid
= ah
->id
;
1807 static void ocrdma_build_sges(struct ocrdma_hdr_wqe
*hdr
,
1808 struct ocrdma_sge
*sge
, int num_sge
,
1809 struct ib_sge
*sg_list
)
1813 for (i
= 0; i
< num_sge
; i
++) {
1814 sge
[i
].lrkey
= sg_list
[i
].lkey
;
1815 sge
[i
].addr_lo
= sg_list
[i
].addr
;
1816 sge
[i
].addr_hi
= upper_32_bits(sg_list
[i
].addr
);
1817 sge
[i
].len
= sg_list
[i
].length
;
1818 hdr
->total_len
+= sg_list
[i
].length
;
1821 memset(sge
, 0, sizeof(*sge
));
1824 static inline uint32_t ocrdma_sglist_len(struct ib_sge
*sg_list
, int num_sge
)
1826 uint32_t total_len
= 0, i
;
1828 for (i
= 0; i
< num_sge
; i
++)
1829 total_len
+= sg_list
[i
].length
;
1834 static int ocrdma_build_inline_sges(struct ocrdma_qp
*qp
,
1835 struct ocrdma_hdr_wqe
*hdr
,
1836 struct ocrdma_sge
*sge
,
1837 struct ib_send_wr
*wr
, u32 wqe_size
)
1842 if (wr
->send_flags
& IB_SEND_INLINE
&& qp
->qp_type
!= IB_QPT_UD
) {
1843 hdr
->total_len
= ocrdma_sglist_len(wr
->sg_list
, wr
->num_sge
);
1844 if (unlikely(hdr
->total_len
> qp
->max_inline_data
)) {
1845 pr_err("%s() supported_len=0x%x,\n"
1846 " unspported len req=0x%x\n", __func__
,
1847 qp
->max_inline_data
, hdr
->total_len
);
1850 dpp_addr
= (char *)sge
;
1851 for (i
= 0; i
< wr
->num_sge
; i
++) {
1853 (void *)(unsigned long)wr
->sg_list
[i
].addr
,
1854 wr
->sg_list
[i
].length
);
1855 dpp_addr
+= wr
->sg_list
[i
].length
;
1858 wqe_size
+= roundup(hdr
->total_len
, OCRDMA_WQE_ALIGN_BYTES
);
1859 if (0 == hdr
->total_len
)
1860 wqe_size
+= sizeof(struct ocrdma_sge
);
1861 hdr
->cw
|= (OCRDMA_TYPE_INLINE
<< OCRDMA_WQE_TYPE_SHIFT
);
1863 ocrdma_build_sges(hdr
, sge
, wr
->num_sge
, wr
->sg_list
);
1865 wqe_size
+= (wr
->num_sge
* sizeof(struct ocrdma_sge
));
1867 wqe_size
+= sizeof(struct ocrdma_sge
);
1868 hdr
->cw
|= (OCRDMA_TYPE_LKEY
<< OCRDMA_WQE_TYPE_SHIFT
);
1870 hdr
->cw
|= ((wqe_size
/ OCRDMA_WQE_STRIDE
) << OCRDMA_WQE_SIZE_SHIFT
);
1874 static int ocrdma_build_send(struct ocrdma_qp
*qp
, struct ocrdma_hdr_wqe
*hdr
,
1875 struct ib_send_wr
*wr
)
1878 struct ocrdma_sge
*sge
;
1879 u32 wqe_size
= sizeof(*hdr
);
1881 if (qp
->qp_type
== IB_QPT_UD
|| qp
->qp_type
== IB_QPT_GSI
) {
1882 ocrdma_build_ud_hdr(qp
, hdr
, wr
);
1883 sge
= (struct ocrdma_sge
*)(hdr
+ 2);
1884 wqe_size
+= sizeof(struct ocrdma_ewqe_ud_hdr
);
1886 sge
= (struct ocrdma_sge
*)(hdr
+ 1);
1889 status
= ocrdma_build_inline_sges(qp
, hdr
, sge
, wr
, wqe_size
);
1893 static int ocrdma_build_write(struct ocrdma_qp
*qp
, struct ocrdma_hdr_wqe
*hdr
,
1894 struct ib_send_wr
*wr
)
1897 struct ocrdma_sge
*ext_rw
= (struct ocrdma_sge
*)(hdr
+ 1);
1898 struct ocrdma_sge
*sge
= ext_rw
+ 1;
1899 u32 wqe_size
= sizeof(*hdr
) + sizeof(*ext_rw
);
1901 status
= ocrdma_build_inline_sges(qp
, hdr
, sge
, wr
, wqe_size
);
1904 ext_rw
->addr_lo
= wr
->wr
.rdma
.remote_addr
;
1905 ext_rw
->addr_hi
= upper_32_bits(wr
->wr
.rdma
.remote_addr
);
1906 ext_rw
->lrkey
= wr
->wr
.rdma
.rkey
;
1907 ext_rw
->len
= hdr
->total_len
;
1911 static void ocrdma_build_read(struct ocrdma_qp
*qp
, struct ocrdma_hdr_wqe
*hdr
,
1912 struct ib_send_wr
*wr
)
1914 struct ocrdma_sge
*ext_rw
= (struct ocrdma_sge
*)(hdr
+ 1);
1915 struct ocrdma_sge
*sge
= ext_rw
+ 1;
1916 u32 wqe_size
= ((wr
->num_sge
+ 1) * sizeof(struct ocrdma_sge
)) +
1917 sizeof(struct ocrdma_hdr_wqe
);
1919 ocrdma_build_sges(hdr
, sge
, wr
->num_sge
, wr
->sg_list
);
1920 hdr
->cw
|= ((wqe_size
/ OCRDMA_WQE_STRIDE
) << OCRDMA_WQE_SIZE_SHIFT
);
1921 hdr
->cw
|= (OCRDMA_READ
<< OCRDMA_WQE_OPCODE_SHIFT
);
1922 hdr
->cw
|= (OCRDMA_TYPE_LKEY
<< OCRDMA_WQE_TYPE_SHIFT
);
1924 ext_rw
->addr_lo
= wr
->wr
.rdma
.remote_addr
;
1925 ext_rw
->addr_hi
= upper_32_bits(wr
->wr
.rdma
.remote_addr
);
1926 ext_rw
->lrkey
= wr
->wr
.rdma
.rkey
;
1927 ext_rw
->len
= hdr
->total_len
;
1930 static void build_frmr_pbes(struct ib_send_wr
*wr
, struct ocrdma_pbl
*pbl_tbl
,
1931 struct ocrdma_hw_mr
*hwmr
)
1936 struct ocrdma_pbe
*pbe
;
1938 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
1941 /* go through the OS phy regions & fill hw pbe entries into pbls. */
1942 for (i
= 0; i
< wr
->wr
.fast_reg
.page_list_len
; i
++) {
1943 /* number of pbes can be more for one OS buf, when
1944 * buffers are of different sizes.
1945 * split the ib_buf to one or more pbes.
1947 buf_addr
= wr
->wr
.fast_reg
.page_list
->page_list
[i
];
1948 pbe
->pa_lo
= cpu_to_le32((u32
) (buf_addr
& PAGE_MASK
));
1949 pbe
->pa_hi
= cpu_to_le32((u32
) upper_32_bits(buf_addr
));
1953 /* if the pbl is full storing the pbes,
1956 if (num_pbes
== (hwmr
->pbl_size
/sizeof(u64
))) {
1958 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
1964 static int get_encoded_page_size(int pg_sz
)
1966 /* Max size is 256M 4096 << 16 */
1969 if (pg_sz
== (4096 << i
))
1975 static int ocrdma_build_fr(struct ocrdma_qp
*qp
, struct ocrdma_hdr_wqe
*hdr
,
1976 struct ib_send_wr
*wr
)
1979 struct ocrdma_ewqe_fr
*fast_reg
= (struct ocrdma_ewqe_fr
*)(hdr
+ 1);
1980 struct ocrdma_mr
*mr
;
1981 u32 wqe_size
= sizeof(*fast_reg
) + sizeof(*hdr
);
1983 wqe_size
= roundup(wqe_size
, OCRDMA_WQE_ALIGN_BYTES
);
1985 if (wr
->wr
.fast_reg
.page_list_len
> qp
->dev
->attr
.max_pages_per_frmr
)
1988 hdr
->cw
|= (OCRDMA_FR_MR
<< OCRDMA_WQE_OPCODE_SHIFT
);
1989 hdr
->cw
|= ((wqe_size
/ OCRDMA_WQE_STRIDE
) << OCRDMA_WQE_SIZE_SHIFT
);
1991 if (wr
->wr
.fast_reg
.page_list_len
== 0)
1993 if (wr
->wr
.fast_reg
.access_flags
& IB_ACCESS_LOCAL_WRITE
)
1994 hdr
->rsvd_lkey_flags
|= OCRDMA_LKEY_FLAG_LOCAL_WR
;
1995 if (wr
->wr
.fast_reg
.access_flags
& IB_ACCESS_REMOTE_WRITE
)
1996 hdr
->rsvd_lkey_flags
|= OCRDMA_LKEY_FLAG_REMOTE_WR
;
1997 if (wr
->wr
.fast_reg
.access_flags
& IB_ACCESS_REMOTE_READ
)
1998 hdr
->rsvd_lkey_flags
|= OCRDMA_LKEY_FLAG_REMOTE_RD
;
1999 hdr
->lkey
= wr
->wr
.fast_reg
.rkey
;
2000 hdr
->total_len
= wr
->wr
.fast_reg
.length
;
2002 fbo
= wr
->wr
.fast_reg
.iova_start
-
2003 (wr
->wr
.fast_reg
.page_list
->page_list
[0] & PAGE_MASK
);
2005 fast_reg
->va_hi
= upper_32_bits(wr
->wr
.fast_reg
.iova_start
);
2006 fast_reg
->va_lo
= (u32
) (wr
->wr
.fast_reg
.iova_start
& 0xffffffff);
2007 fast_reg
->fbo_hi
= upper_32_bits(fbo
);
2008 fast_reg
->fbo_lo
= (u32
) fbo
& 0xffffffff;
2009 fast_reg
->num_sges
= wr
->wr
.fast_reg
.page_list_len
;
2010 fast_reg
->size_sge
=
2011 get_encoded_page_size(1 << wr
->wr
.fast_reg
.page_shift
);
2012 mr
= (struct ocrdma_mr
*) (unsigned long) qp
->dev
->stag_arr
[(hdr
->lkey
>> 8) &
2013 (OCRDMA_MAX_STAG
- 1)];
2014 build_frmr_pbes(wr
, mr
->hwmr
.pbl_table
, &mr
->hwmr
);
2018 static void ocrdma_ring_sq_db(struct ocrdma_qp
*qp
)
2020 u32 val
= qp
->sq
.dbid
| (1 << 16);
2022 iowrite32(val
, qp
->sq_db
);
2025 int ocrdma_post_send(struct ib_qp
*ibqp
, struct ib_send_wr
*wr
,
2026 struct ib_send_wr
**bad_wr
)
2029 struct ocrdma_qp
*qp
= get_ocrdma_qp(ibqp
);
2030 struct ocrdma_hdr_wqe
*hdr
;
2031 unsigned long flags
;
2033 spin_lock_irqsave(&qp
->q_lock
, flags
);
2034 if (qp
->state
!= OCRDMA_QPS_RTS
&& qp
->state
!= OCRDMA_QPS_SQD
) {
2035 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
2041 if (ocrdma_hwq_free_cnt(&qp
->sq
) == 0 ||
2042 wr
->num_sge
> qp
->sq
.max_sges
) {
2047 hdr
= ocrdma_hwq_head(&qp
->sq
);
2049 if (wr
->send_flags
& IB_SEND_SIGNALED
|| qp
->signaled
)
2050 hdr
->cw
|= (OCRDMA_FLAG_SIG
<< OCRDMA_WQE_FLAGS_SHIFT
);
2051 if (wr
->send_flags
& IB_SEND_FENCE
)
2053 (OCRDMA_FLAG_FENCE_L
<< OCRDMA_WQE_FLAGS_SHIFT
);
2054 if (wr
->send_flags
& IB_SEND_SOLICITED
)
2056 (OCRDMA_FLAG_SOLICIT
<< OCRDMA_WQE_FLAGS_SHIFT
);
2058 switch (wr
->opcode
) {
2059 case IB_WR_SEND_WITH_IMM
:
2060 hdr
->cw
|= (OCRDMA_FLAG_IMM
<< OCRDMA_WQE_FLAGS_SHIFT
);
2061 hdr
->immdt
= ntohl(wr
->ex
.imm_data
);
2063 hdr
->cw
|= (OCRDMA_SEND
<< OCRDMA_WQE_OPCODE_SHIFT
);
2064 ocrdma_build_send(qp
, hdr
, wr
);
2066 case IB_WR_SEND_WITH_INV
:
2067 hdr
->cw
|= (OCRDMA_FLAG_INV
<< OCRDMA_WQE_FLAGS_SHIFT
);
2068 hdr
->cw
|= (OCRDMA_SEND
<< OCRDMA_WQE_OPCODE_SHIFT
);
2069 hdr
->lkey
= wr
->ex
.invalidate_rkey
;
2070 status
= ocrdma_build_send(qp
, hdr
, wr
);
2072 case IB_WR_RDMA_WRITE_WITH_IMM
:
2073 hdr
->cw
|= (OCRDMA_FLAG_IMM
<< OCRDMA_WQE_FLAGS_SHIFT
);
2074 hdr
->immdt
= ntohl(wr
->ex
.imm_data
);
2075 case IB_WR_RDMA_WRITE
:
2076 hdr
->cw
|= (OCRDMA_WRITE
<< OCRDMA_WQE_OPCODE_SHIFT
);
2077 status
= ocrdma_build_write(qp
, hdr
, wr
);
2079 case IB_WR_RDMA_READ_WITH_INV
:
2080 hdr
->cw
|= (OCRDMA_FLAG_INV
<< OCRDMA_WQE_FLAGS_SHIFT
);
2081 case IB_WR_RDMA_READ
:
2082 ocrdma_build_read(qp
, hdr
, wr
);
2084 case IB_WR_LOCAL_INV
:
2086 (OCRDMA_LKEY_INV
<< OCRDMA_WQE_OPCODE_SHIFT
);
2087 hdr
->cw
|= ((sizeof(struct ocrdma_hdr_wqe
) +
2088 sizeof(struct ocrdma_sge
)) /
2089 OCRDMA_WQE_STRIDE
) << OCRDMA_WQE_SIZE_SHIFT
;
2090 hdr
->lkey
= wr
->ex
.invalidate_rkey
;
2092 case IB_WR_FAST_REG_MR
:
2093 status
= ocrdma_build_fr(qp
, hdr
, wr
);
2103 if (wr
->send_flags
& IB_SEND_SIGNALED
|| qp
->signaled
)
2104 qp
->wqe_wr_id_tbl
[qp
->sq
.head
].signaled
= 1;
2106 qp
->wqe_wr_id_tbl
[qp
->sq
.head
].signaled
= 0;
2107 qp
->wqe_wr_id_tbl
[qp
->sq
.head
].wrid
= wr
->wr_id
;
2108 ocrdma_cpu_to_le32(hdr
, ((hdr
->cw
>> OCRDMA_WQE_SIZE_SHIFT
) &
2109 OCRDMA_WQE_SIZE_MASK
) * OCRDMA_WQE_STRIDE
);
2110 /* make sure wqe is written before adapter can access it */
2112 /* inform hw to start processing it */
2113 ocrdma_ring_sq_db(qp
);
2115 /* update pointer, counter for next wr */
2116 ocrdma_hwq_inc_head(&qp
->sq
);
2119 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
2123 static void ocrdma_ring_rq_db(struct ocrdma_qp
*qp
)
2125 u32 val
= qp
->rq
.dbid
| (1 << ocrdma_get_num_posted_shift(qp
));
2127 if (qp
->state
!= OCRDMA_QPS_INIT
)
2128 iowrite32(val
, qp
->rq_db
);
2133 static void ocrdma_build_rqe(struct ocrdma_hdr_wqe
*rqe
, struct ib_recv_wr
*wr
,
2137 struct ocrdma_sge
*sge
;
2139 wqe_size
= (wr
->num_sge
* sizeof(*sge
)) + sizeof(*rqe
);
2141 wqe_size
= sizeof(*sge
) + sizeof(*rqe
);
2143 rqe
->cw
= ((wqe_size
/ OCRDMA_WQE_STRIDE
) <<
2144 OCRDMA_WQE_SIZE_SHIFT
);
2145 rqe
->cw
|= (OCRDMA_FLAG_SIG
<< OCRDMA_WQE_FLAGS_SHIFT
);
2146 rqe
->cw
|= (OCRDMA_TYPE_LKEY
<< OCRDMA_WQE_TYPE_SHIFT
);
2148 rqe
->rsvd_tag
= tag
;
2149 sge
= (struct ocrdma_sge
*)(rqe
+ 1);
2150 ocrdma_build_sges(rqe
, sge
, wr
->num_sge
, wr
->sg_list
);
2151 ocrdma_cpu_to_le32(rqe
, wqe_size
);
2154 int ocrdma_post_recv(struct ib_qp
*ibqp
, struct ib_recv_wr
*wr
,
2155 struct ib_recv_wr
**bad_wr
)
2158 unsigned long flags
;
2159 struct ocrdma_qp
*qp
= get_ocrdma_qp(ibqp
);
2160 struct ocrdma_hdr_wqe
*rqe
;
2162 spin_lock_irqsave(&qp
->q_lock
, flags
);
2163 if (qp
->state
== OCRDMA_QPS_RST
|| qp
->state
== OCRDMA_QPS_ERR
) {
2164 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
2169 if (ocrdma_hwq_free_cnt(&qp
->rq
) == 0 ||
2170 wr
->num_sge
> qp
->rq
.max_sges
) {
2175 rqe
= ocrdma_hwq_head(&qp
->rq
);
2176 ocrdma_build_rqe(rqe
, wr
, 0);
2178 qp
->rqe_wr_id_tbl
[qp
->rq
.head
] = wr
->wr_id
;
2179 /* make sure rqe is written before adapter can access it */
2182 /* inform hw to start processing it */
2183 ocrdma_ring_rq_db(qp
);
2185 /* update pointer, counter for next wr */
2186 ocrdma_hwq_inc_head(&qp
->rq
);
2189 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
2193 /* cqe for srq's rqe can potentially arrive out of order.
2194 * index gives the entry in the shadow table where to store
2195 * the wr_id. tag/index is returned in cqe to reference back
2198 static int ocrdma_srq_get_idx(struct ocrdma_srq
*srq
)
2203 for (row
= 0; row
< srq
->bit_fields_len
; row
++) {
2204 if (srq
->idx_bit_fields
[row
]) {
2205 indx
= ffs(srq
->idx_bit_fields
[row
]);
2206 indx
= (row
* 32) + (indx
- 1);
2207 if (indx
>= srq
->rq
.max_cnt
)
2209 ocrdma_srq_toggle_bit(srq
, indx
);
2214 if (row
== srq
->bit_fields_len
)
2219 static void ocrdma_ring_srq_db(struct ocrdma_srq
*srq
)
2221 u32 val
= srq
->rq
.dbid
| (1 << 16);
2223 iowrite32(val
, srq
->db
+ OCRDMA_DB_GEN2_SRQ_OFFSET
);
2226 int ocrdma_post_srq_recv(struct ib_srq
*ibsrq
, struct ib_recv_wr
*wr
,
2227 struct ib_recv_wr
**bad_wr
)
2230 unsigned long flags
;
2231 struct ocrdma_srq
*srq
;
2232 struct ocrdma_hdr_wqe
*rqe
;
2235 srq
= get_ocrdma_srq(ibsrq
);
2237 spin_lock_irqsave(&srq
->q_lock
, flags
);
2239 if (ocrdma_hwq_free_cnt(&srq
->rq
) == 0 ||
2240 wr
->num_sge
> srq
->rq
.max_sges
) {
2245 tag
= ocrdma_srq_get_idx(srq
);
2246 rqe
= ocrdma_hwq_head(&srq
->rq
);
2247 ocrdma_build_rqe(rqe
, wr
, tag
);
2249 srq
->rqe_wr_id_tbl
[tag
] = wr
->wr_id
;
2250 /* make sure rqe is written before adapter can perform DMA */
2252 /* inform hw to start processing it */
2253 ocrdma_ring_srq_db(srq
);
2254 /* update pointer, counter for next wr */
2255 ocrdma_hwq_inc_head(&srq
->rq
);
2258 spin_unlock_irqrestore(&srq
->q_lock
, flags
);
2262 static enum ib_wc_status
ocrdma_to_ibwc_err(u16 status
)
2264 enum ib_wc_status ibwc_status
;
2267 case OCRDMA_CQE_GENERAL_ERR
:
2268 ibwc_status
= IB_WC_GENERAL_ERR
;
2270 case OCRDMA_CQE_LOC_LEN_ERR
:
2271 ibwc_status
= IB_WC_LOC_LEN_ERR
;
2273 case OCRDMA_CQE_LOC_QP_OP_ERR
:
2274 ibwc_status
= IB_WC_LOC_QP_OP_ERR
;
2276 case OCRDMA_CQE_LOC_EEC_OP_ERR
:
2277 ibwc_status
= IB_WC_LOC_EEC_OP_ERR
;
2279 case OCRDMA_CQE_LOC_PROT_ERR
:
2280 ibwc_status
= IB_WC_LOC_PROT_ERR
;
2282 case OCRDMA_CQE_WR_FLUSH_ERR
:
2283 ibwc_status
= IB_WC_WR_FLUSH_ERR
;
2285 case OCRDMA_CQE_MW_BIND_ERR
:
2286 ibwc_status
= IB_WC_MW_BIND_ERR
;
2288 case OCRDMA_CQE_BAD_RESP_ERR
:
2289 ibwc_status
= IB_WC_BAD_RESP_ERR
;
2291 case OCRDMA_CQE_LOC_ACCESS_ERR
:
2292 ibwc_status
= IB_WC_LOC_ACCESS_ERR
;
2294 case OCRDMA_CQE_REM_INV_REQ_ERR
:
2295 ibwc_status
= IB_WC_REM_INV_REQ_ERR
;
2297 case OCRDMA_CQE_REM_ACCESS_ERR
:
2298 ibwc_status
= IB_WC_REM_ACCESS_ERR
;
2300 case OCRDMA_CQE_REM_OP_ERR
:
2301 ibwc_status
= IB_WC_REM_OP_ERR
;
2303 case OCRDMA_CQE_RETRY_EXC_ERR
:
2304 ibwc_status
= IB_WC_RETRY_EXC_ERR
;
2306 case OCRDMA_CQE_RNR_RETRY_EXC_ERR
:
2307 ibwc_status
= IB_WC_RNR_RETRY_EXC_ERR
;
2309 case OCRDMA_CQE_LOC_RDD_VIOL_ERR
:
2310 ibwc_status
= IB_WC_LOC_RDD_VIOL_ERR
;
2312 case OCRDMA_CQE_REM_INV_RD_REQ_ERR
:
2313 ibwc_status
= IB_WC_REM_INV_RD_REQ_ERR
;
2315 case OCRDMA_CQE_REM_ABORT_ERR
:
2316 ibwc_status
= IB_WC_REM_ABORT_ERR
;
2318 case OCRDMA_CQE_INV_EECN_ERR
:
2319 ibwc_status
= IB_WC_INV_EECN_ERR
;
2321 case OCRDMA_CQE_INV_EEC_STATE_ERR
:
2322 ibwc_status
= IB_WC_INV_EEC_STATE_ERR
;
2324 case OCRDMA_CQE_FATAL_ERR
:
2325 ibwc_status
= IB_WC_FATAL_ERR
;
2327 case OCRDMA_CQE_RESP_TIMEOUT_ERR
:
2328 ibwc_status
= IB_WC_RESP_TIMEOUT_ERR
;
2331 ibwc_status
= IB_WC_GENERAL_ERR
;
2337 static void ocrdma_update_wc(struct ocrdma_qp
*qp
, struct ib_wc
*ibwc
,
2340 struct ocrdma_hdr_wqe
*hdr
;
2341 struct ocrdma_sge
*rw
;
2344 hdr
= ocrdma_hwq_head_from_idx(&qp
->sq
, wqe_idx
);
2346 ibwc
->wr_id
= qp
->wqe_wr_id_tbl
[wqe_idx
].wrid
;
2347 /* Undo the hdr->cw swap */
2348 opcode
= le32_to_cpu(hdr
->cw
) & OCRDMA_WQE_OPCODE_MASK
;
2351 ibwc
->opcode
= IB_WC_RDMA_WRITE
;
2354 rw
= (struct ocrdma_sge
*)(hdr
+ 1);
2355 ibwc
->opcode
= IB_WC_RDMA_READ
;
2356 ibwc
->byte_len
= rw
->len
;
2359 ibwc
->opcode
= IB_WC_SEND
;
2362 ibwc
->opcode
= IB_WC_FAST_REG_MR
;
2364 case OCRDMA_LKEY_INV
:
2365 ibwc
->opcode
= IB_WC_LOCAL_INV
;
2368 ibwc
->status
= IB_WC_GENERAL_ERR
;
2369 pr_err("%s() invalid opcode received = 0x%x\n",
2370 __func__
, hdr
->cw
& OCRDMA_WQE_OPCODE_MASK
);
2375 static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp
*qp
,
2376 struct ocrdma_cqe
*cqe
)
2378 if (is_cqe_for_sq(cqe
)) {
2379 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2380 cqe
->flags_status_srcqpn
) &
2381 ~OCRDMA_CQE_STATUS_MASK
);
2382 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2383 cqe
->flags_status_srcqpn
) |
2384 (OCRDMA_CQE_WR_FLUSH_ERR
<<
2385 OCRDMA_CQE_STATUS_SHIFT
));
2387 if (qp
->qp_type
== IB_QPT_UD
|| qp
->qp_type
== IB_QPT_GSI
) {
2388 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2389 cqe
->flags_status_srcqpn
) &
2390 ~OCRDMA_CQE_UD_STATUS_MASK
);
2391 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2392 cqe
->flags_status_srcqpn
) |
2393 (OCRDMA_CQE_WR_FLUSH_ERR
<<
2394 OCRDMA_CQE_UD_STATUS_SHIFT
));
2396 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2397 cqe
->flags_status_srcqpn
) &
2398 ~OCRDMA_CQE_STATUS_MASK
);
2399 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2400 cqe
->flags_status_srcqpn
) |
2401 (OCRDMA_CQE_WR_FLUSH_ERR
<<
2402 OCRDMA_CQE_STATUS_SHIFT
));
2407 static bool ocrdma_update_err_cqe(struct ib_wc
*ibwc
, struct ocrdma_cqe
*cqe
,
2408 struct ocrdma_qp
*qp
, int status
)
2410 bool expand
= false;
2413 ibwc
->qp
= &qp
->ibqp
;
2414 ibwc
->status
= ocrdma_to_ibwc_err(status
);
2416 ocrdma_flush_qp(qp
);
2417 ocrdma_qp_state_change(qp
, IB_QPS_ERR
, NULL
);
2419 /* if wqe/rqe pending for which cqe needs to be returned,
2420 * trigger inflating it.
2422 if (!is_hw_rq_empty(qp
) || !is_hw_sq_empty(qp
)) {
2424 ocrdma_set_cqe_status_flushed(qp
, cqe
);
2429 static int ocrdma_update_err_rcqe(struct ib_wc
*ibwc
, struct ocrdma_cqe
*cqe
,
2430 struct ocrdma_qp
*qp
, int status
)
2432 ibwc
->opcode
= IB_WC_RECV
;
2433 ibwc
->wr_id
= qp
->rqe_wr_id_tbl
[qp
->rq
.tail
];
2434 ocrdma_hwq_inc_tail(&qp
->rq
);
2436 return ocrdma_update_err_cqe(ibwc
, cqe
, qp
, status
);
2439 static int ocrdma_update_err_scqe(struct ib_wc
*ibwc
, struct ocrdma_cqe
*cqe
,
2440 struct ocrdma_qp
*qp
, int status
)
2442 ocrdma_update_wc(qp
, ibwc
, qp
->sq
.tail
);
2443 ocrdma_hwq_inc_tail(&qp
->sq
);
2445 return ocrdma_update_err_cqe(ibwc
, cqe
, qp
, status
);
2449 static bool ocrdma_poll_err_scqe(struct ocrdma_qp
*qp
,
2450 struct ocrdma_cqe
*cqe
, struct ib_wc
*ibwc
,
2451 bool *polled
, bool *stop
)
2454 int status
= (le32_to_cpu(cqe
->flags_status_srcqpn
) &
2455 OCRDMA_CQE_STATUS_MASK
) >> OCRDMA_CQE_STATUS_SHIFT
;
2457 /* when hw sq is empty, but rq is not empty, so we continue
2458 * to keep the cqe in order to get the cq event again.
2460 if (is_hw_sq_empty(qp
) && !is_hw_rq_empty(qp
)) {
2461 /* when cq for rq and sq is same, it is safe to return
2462 * flush cqe for RQEs.
2464 if (!qp
->srq
&& (qp
->sq_cq
== qp
->rq_cq
)) {
2466 status
= OCRDMA_CQE_WR_FLUSH_ERR
;
2467 expand
= ocrdma_update_err_rcqe(ibwc
, cqe
, qp
, status
);
2469 /* stop processing further cqe as this cqe is used for
2470 * triggering cq event on buddy cq of RQ.
2471 * When QP is destroyed, this cqe will be removed
2472 * from the cq's hardware q.
2480 expand
= ocrdma_update_err_scqe(ibwc
, cqe
, qp
, status
);
2485 static bool ocrdma_poll_success_scqe(struct ocrdma_qp
*qp
,
2486 struct ocrdma_cqe
*cqe
,
2487 struct ib_wc
*ibwc
, bool *polled
)
2489 bool expand
= false;
2490 int tail
= qp
->sq
.tail
;
2493 if (!qp
->wqe_wr_id_tbl
[tail
].signaled
) {
2494 *polled
= false; /* WC cannot be consumed yet */
2496 ibwc
->status
= IB_WC_SUCCESS
;
2498 ibwc
->qp
= &qp
->ibqp
;
2499 ocrdma_update_wc(qp
, ibwc
, tail
);
2502 wqe_idx
= (le32_to_cpu(cqe
->wq
.wqeidx
) &
2503 OCRDMA_CQE_WQEIDX_MASK
) & qp
->sq
.max_wqe_idx
;
2504 if (tail
!= wqe_idx
)
2505 expand
= true; /* Coalesced CQE can't be consumed yet */
2507 ocrdma_hwq_inc_tail(&qp
->sq
);
2511 static bool ocrdma_poll_scqe(struct ocrdma_qp
*qp
, struct ocrdma_cqe
*cqe
,
2512 struct ib_wc
*ibwc
, bool *polled
, bool *stop
)
2517 status
= (le32_to_cpu(cqe
->flags_status_srcqpn
) &
2518 OCRDMA_CQE_STATUS_MASK
) >> OCRDMA_CQE_STATUS_SHIFT
;
2520 if (status
== OCRDMA_CQE_SUCCESS
)
2521 expand
= ocrdma_poll_success_scqe(qp
, cqe
, ibwc
, polled
);
2523 expand
= ocrdma_poll_err_scqe(qp
, cqe
, ibwc
, polled
, stop
);
2527 static int ocrdma_update_ud_rcqe(struct ib_wc
*ibwc
, struct ocrdma_cqe
*cqe
)
2531 status
= (le32_to_cpu(cqe
->flags_status_srcqpn
) &
2532 OCRDMA_CQE_UD_STATUS_MASK
) >> OCRDMA_CQE_UD_STATUS_SHIFT
;
2533 ibwc
->src_qp
= le32_to_cpu(cqe
->flags_status_srcqpn
) &
2534 OCRDMA_CQE_SRCQP_MASK
;
2535 ibwc
->pkey_index
= le32_to_cpu(cqe
->ud
.rxlen_pkey
) &
2536 OCRDMA_CQE_PKEY_MASK
;
2537 ibwc
->wc_flags
= IB_WC_GRH
;
2538 ibwc
->byte_len
= (le32_to_cpu(cqe
->ud
.rxlen_pkey
) >>
2539 OCRDMA_CQE_UD_XFER_LEN_SHIFT
);
2543 static void ocrdma_update_free_srq_cqe(struct ib_wc
*ibwc
,
2544 struct ocrdma_cqe
*cqe
,
2545 struct ocrdma_qp
*qp
)
2547 unsigned long flags
;
2548 struct ocrdma_srq
*srq
;
2551 srq
= get_ocrdma_srq(qp
->ibqp
.srq
);
2552 wqe_idx
= (le32_to_cpu(cqe
->rq
.buftag_qpn
) >>
2553 OCRDMA_CQE_BUFTAG_SHIFT
) & srq
->rq
.max_wqe_idx
;
2554 ibwc
->wr_id
= srq
->rqe_wr_id_tbl
[wqe_idx
];
2555 spin_lock_irqsave(&srq
->q_lock
, flags
);
2556 ocrdma_srq_toggle_bit(srq
, wqe_idx
);
2557 spin_unlock_irqrestore(&srq
->q_lock
, flags
);
2558 ocrdma_hwq_inc_tail(&srq
->rq
);
2561 static bool ocrdma_poll_err_rcqe(struct ocrdma_qp
*qp
, struct ocrdma_cqe
*cqe
,
2562 struct ib_wc
*ibwc
, bool *polled
, bool *stop
,
2567 /* when hw_rq is empty, but wq is not empty, so continue
2568 * to keep the cqe to get the cq event again.
2570 if (is_hw_rq_empty(qp
) && !is_hw_sq_empty(qp
)) {
2571 if (!qp
->srq
&& (qp
->sq_cq
== qp
->rq_cq
)) {
2573 status
= OCRDMA_CQE_WR_FLUSH_ERR
;
2574 expand
= ocrdma_update_err_scqe(ibwc
, cqe
, qp
, status
);
2582 expand
= ocrdma_update_err_rcqe(ibwc
, cqe
, qp
, status
);
2587 static void ocrdma_poll_success_rcqe(struct ocrdma_qp
*qp
,
2588 struct ocrdma_cqe
*cqe
, struct ib_wc
*ibwc
)
2590 ibwc
->opcode
= IB_WC_RECV
;
2591 ibwc
->qp
= &qp
->ibqp
;
2592 ibwc
->status
= IB_WC_SUCCESS
;
2594 if (qp
->qp_type
== IB_QPT_UD
|| qp
->qp_type
== IB_QPT_GSI
)
2595 ocrdma_update_ud_rcqe(ibwc
, cqe
);
2597 ibwc
->byte_len
= le32_to_cpu(cqe
->rq
.rxlen
);
2599 if (is_cqe_imm(cqe
)) {
2600 ibwc
->ex
.imm_data
= htonl(le32_to_cpu(cqe
->rq
.lkey_immdt
));
2601 ibwc
->wc_flags
|= IB_WC_WITH_IMM
;
2602 } else if (is_cqe_wr_imm(cqe
)) {
2603 ibwc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
2604 ibwc
->ex
.imm_data
= htonl(le32_to_cpu(cqe
->rq
.lkey_immdt
));
2605 ibwc
->wc_flags
|= IB_WC_WITH_IMM
;
2606 } else if (is_cqe_invalidated(cqe
)) {
2607 ibwc
->ex
.invalidate_rkey
= le32_to_cpu(cqe
->rq
.lkey_immdt
);
2608 ibwc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
2611 ocrdma_update_free_srq_cqe(ibwc
, cqe
, qp
);
2613 ibwc
->wr_id
= qp
->rqe_wr_id_tbl
[qp
->rq
.tail
];
2614 ocrdma_hwq_inc_tail(&qp
->rq
);
2618 static bool ocrdma_poll_rcqe(struct ocrdma_qp
*qp
, struct ocrdma_cqe
*cqe
,
2619 struct ib_wc
*ibwc
, bool *polled
, bool *stop
)
2622 bool expand
= false;
2625 if (qp
->qp_type
== IB_QPT_UD
|| qp
->qp_type
== IB_QPT_GSI
) {
2626 status
= (le32_to_cpu(cqe
->flags_status_srcqpn
) &
2627 OCRDMA_CQE_UD_STATUS_MASK
) >>
2628 OCRDMA_CQE_UD_STATUS_SHIFT
;
2630 status
= (le32_to_cpu(cqe
->flags_status_srcqpn
) &
2631 OCRDMA_CQE_STATUS_MASK
) >> OCRDMA_CQE_STATUS_SHIFT
;
2634 if (status
== OCRDMA_CQE_SUCCESS
) {
2636 ocrdma_poll_success_rcqe(qp
, cqe
, ibwc
);
2638 expand
= ocrdma_poll_err_rcqe(qp
, cqe
, ibwc
, polled
, stop
,
2644 static void ocrdma_change_cq_phase(struct ocrdma_cq
*cq
, struct ocrdma_cqe
*cqe
,
2647 if (cq
->phase_change
) {
2649 cq
->phase
= (~cq
->phase
& OCRDMA_CQE_VALID
);
2651 /* clear valid bit */
2652 cqe
->flags_status_srcqpn
= 0;
2656 static int ocrdma_poll_hwcq(struct ocrdma_cq
*cq
, int num_entries
,
2661 bool expand
= false;
2662 int polled_hw_cqes
= 0;
2663 struct ocrdma_qp
*qp
= NULL
;
2664 struct ocrdma_dev
*dev
= get_ocrdma_dev(cq
->ibcq
.device
);
2665 struct ocrdma_cqe
*cqe
;
2666 u16 cur_getp
; bool polled
= false; bool stop
= false;
2668 cur_getp
= cq
->getp
;
2669 while (num_entries
) {
2670 cqe
= cq
->va
+ cur_getp
;
2671 /* check whether valid cqe or not */
2672 if (!is_cqe_valid(cq
, cqe
))
2674 qpn
= (le32_to_cpu(cqe
->cmn
.qpn
) & OCRDMA_CQE_QPN_MASK
);
2675 /* ignore discarded cqe */
2678 qp
= dev
->qp_tbl
[qpn
];
2681 if (is_cqe_for_sq(cqe
)) {
2682 expand
= ocrdma_poll_scqe(qp
, cqe
, ibwc
, &polled
,
2685 expand
= ocrdma_poll_rcqe(qp
, cqe
, ibwc
, &polled
,
2692 /* clear qpn to avoid duplicate processing by discard_cqe() */
2695 polled_hw_cqes
+= 1;
2696 cur_getp
= (cur_getp
+ 1) % cq
->max_hw_cqe
;
2697 ocrdma_change_cq_phase(cq
, cqe
, cur_getp
);
2707 cq
->getp
= cur_getp
;
2708 if (polled_hw_cqes
|| expand
|| stop
) {
2709 ocrdma_ring_cq_db(dev
, cq
->id
, cq
->armed
, cq
->solicited
,
2715 /* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
2716 static int ocrdma_add_err_cqe(struct ocrdma_cq
*cq
, int num_entries
,
2717 struct ocrdma_qp
*qp
, struct ib_wc
*ibwc
)
2721 while (num_entries
) {
2722 if (is_hw_sq_empty(qp
) && is_hw_rq_empty(qp
))
2724 if (!is_hw_sq_empty(qp
) && qp
->sq_cq
== cq
) {
2725 ocrdma_update_wc(qp
, ibwc
, qp
->sq
.tail
);
2726 ocrdma_hwq_inc_tail(&qp
->sq
);
2727 } else if (!is_hw_rq_empty(qp
) && qp
->rq_cq
== cq
) {
2728 ibwc
->wr_id
= qp
->rqe_wr_id_tbl
[qp
->rq
.tail
];
2729 ocrdma_hwq_inc_tail(&qp
->rq
);
2734 ibwc
->status
= IB_WC_WR_FLUSH_ERR
;
2742 int ocrdma_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
2744 int cqes_to_poll
= num_entries
;
2745 struct ocrdma_cq
*cq
= get_ocrdma_cq(ibcq
);
2746 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibcq
->device
);
2747 int num_os_cqe
= 0, err_cqes
= 0;
2748 struct ocrdma_qp
*qp
;
2749 unsigned long flags
;
2751 /* poll cqes from adapter CQ */
2752 spin_lock_irqsave(&cq
->cq_lock
, flags
);
2753 num_os_cqe
= ocrdma_poll_hwcq(cq
, cqes_to_poll
, wc
);
2754 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
2755 cqes_to_poll
-= num_os_cqe
;
2758 wc
= wc
+ num_os_cqe
;
2759 /* adapter returns single error cqe when qp moves to
2760 * error state. So insert error cqes with wc_status as
2761 * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
2762 * respectively which uses this CQ.
2764 spin_lock_irqsave(&dev
->flush_q_lock
, flags
);
2765 list_for_each_entry(qp
, &cq
->sq_head
, sq_entry
) {
2766 if (cqes_to_poll
== 0)
2768 err_cqes
= ocrdma_add_err_cqe(cq
, cqes_to_poll
, qp
, wc
);
2769 cqes_to_poll
-= err_cqes
;
2770 num_os_cqe
+= err_cqes
;
2773 spin_unlock_irqrestore(&dev
->flush_q_lock
, flags
);
2778 int ocrdma_arm_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags cq_flags
)
2780 struct ocrdma_cq
*cq
= get_ocrdma_cq(ibcq
);
2781 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibcq
->device
);
2784 struct ocrdma_cqe
*cqe
;
2785 unsigned long flags
;
2789 spin_lock_irqsave(&cq
->cq_lock
, flags
);
2790 if (cq_flags
& IB_CQ_NEXT_COMP
|| cq_flags
& IB_CQ_SOLICITED
)
2792 if (cq_flags
& IB_CQ_SOLICITED
)
2793 cq
->solicited
= true;
2795 cur_getp
= cq
->getp
;
2796 cqe
= cq
->va
+ cur_getp
;
2798 /* check whether any valid cqe exist or not, if not then safe to
2799 * arm. If cqe is not yet consumed, then let it get consumed and then
2800 * we arm it to avoid false interrupts.
2802 if (!is_cqe_valid(cq
, cqe
) || cq
->arm_needed
) {
2803 cq
->arm_needed
= false;
2804 ocrdma_ring_cq_db(dev
, cq_id
, cq
->armed
, cq
->solicited
, 0);
2806 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
2810 struct ib_mr
*ocrdma_alloc_frmr(struct ib_pd
*ibpd
, int max_page_list_len
)
2813 struct ocrdma_mr
*mr
;
2814 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
2815 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
2817 if (max_page_list_len
> dev
->attr
.max_pages_per_frmr
)
2818 return ERR_PTR(-EINVAL
);
2820 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
2822 return ERR_PTR(-ENOMEM
);
2824 status
= ocrdma_get_pbl_info(dev
, mr
, max_page_list_len
);
2828 mr
->hwmr
.remote_rd
= 0;
2829 mr
->hwmr
.remote_wr
= 0;
2830 mr
->hwmr
.local_rd
= 0;
2831 mr
->hwmr
.local_wr
= 0;
2832 mr
->hwmr
.mw_bind
= 0;
2833 status
= ocrdma_build_pbl_tbl(dev
, &mr
->hwmr
);
2836 status
= ocrdma_reg_mr(dev
, &mr
->hwmr
, pd
->id
, 0);
2839 mr
->ibmr
.rkey
= mr
->hwmr
.lkey
;
2840 mr
->ibmr
.lkey
= mr
->hwmr
.lkey
;
2841 dev
->stag_arr
[(mr
->hwmr
.lkey
>> 8) & (OCRDMA_MAX_STAG
- 1)] = mr
;
2844 ocrdma_free_mr_pbl_tbl(dev
, &mr
->hwmr
);
2847 return ERR_PTR(-ENOMEM
);
2850 struct ib_fast_reg_page_list
*ocrdma_alloc_frmr_page_list(struct ib_device
2854 struct ib_fast_reg_page_list
*frmr_list
;
2857 size
= sizeof(*frmr_list
) + (page_list_len
* sizeof(u64
));
2858 frmr_list
= kzalloc(size
, GFP_KERNEL
);
2860 return ERR_PTR(-ENOMEM
);
2861 frmr_list
->page_list
= (u64
*)(frmr_list
+ 1);
2865 void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list
*page_list
)
2870 #define MAX_KERNEL_PBE_SIZE 65536
2871 static inline int count_kernel_pbes(struct ib_phys_buf
*buf_list
,
2872 int buf_cnt
, u32
*pbe_size
)
2877 *pbe_size
= roundup(buf_list
[0].size
, PAGE_SIZE
);
2878 *pbe_size
= roundup_pow_of_two(*pbe_size
);
2880 /* find the smallest PBE size that we can have */
2881 for (i
= 0; i
< buf_cnt
; i
++) {
2882 /* first addr may not be page aligned, so ignore checking */
2883 if ((i
!= 0) && ((buf_list
[i
].addr
& ~PAGE_MASK
) ||
2884 (buf_list
[i
].size
& ~PAGE_MASK
))) {
2888 /* if configured PBE size is greater then the chosen one,
2889 * reduce the PBE size.
2891 buf_size
= roundup(buf_list
[i
].size
, PAGE_SIZE
);
2892 /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
2893 buf_size
= roundup_pow_of_two(buf_size
);
2894 if (*pbe_size
> buf_size
)
2895 *pbe_size
= buf_size
;
2897 total_size
+= buf_size
;
2899 *pbe_size
= *pbe_size
> MAX_KERNEL_PBE_SIZE
?
2900 (MAX_KERNEL_PBE_SIZE
) : (*pbe_size
);
2902 /* num_pbes = total_size / (*pbe_size); this is implemented below. */
2904 return total_size
>> ilog2(*pbe_size
);
2907 static void build_kernel_pbes(struct ib_phys_buf
*buf_list
, int ib_buf_cnt
,
2908 u32 pbe_size
, struct ocrdma_pbl
*pbl_tbl
,
2909 struct ocrdma_hw_mr
*hwmr
)
2913 int pbes_per_buf
= 0;
2916 struct ocrdma_pbe
*pbe
;
2917 int total_num_pbes
= 0;
2919 if (!hwmr
->num_pbes
)
2922 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
2925 /* go through the OS phy regions & fill hw pbe entries into pbls. */
2926 for (i
= 0; i
< ib_buf_cnt
; i
++) {
2927 buf_addr
= buf_list
[i
].addr
;
2929 roundup_pow_of_two(roundup(buf_list
[i
].size
, PAGE_SIZE
)) /
2931 hwmr
->len
+= buf_list
[i
].size
;
2932 /* number of pbes can be more for one OS buf, when
2933 * buffers are of different sizes.
2934 * split the ib_buf to one or more pbes.
2936 for (idx
= 0; idx
< pbes_per_buf
; idx
++) {
2937 /* we program always page aligned addresses,
2938 * first unaligned address is taken care by fbo.
2941 /* for non zero fbo, assign the
2942 * start of the page.
2945 cpu_to_le32((u32
) (buf_addr
& PAGE_MASK
));
2947 cpu_to_le32((u32
) upper_32_bits(buf_addr
));
2950 cpu_to_le32((u32
) (buf_addr
& 0xffffffff));
2952 cpu_to_le32((u32
) upper_32_bits(buf_addr
));
2954 buf_addr
+= pbe_size
;
2956 total_num_pbes
+= 1;
2959 if (total_num_pbes
== hwmr
->num_pbes
)
2961 /* if the pbl is full storing the pbes,
2964 if (num_pbes
== (hwmr
->pbl_size
/sizeof(u64
))) {
2966 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
2975 struct ib_mr
*ocrdma_reg_kernel_mr(struct ib_pd
*ibpd
,
2976 struct ib_phys_buf
*buf_list
,
2977 int buf_cnt
, int acc
, u64
*iova_start
)
2979 int status
= -ENOMEM
;
2980 struct ocrdma_mr
*mr
;
2981 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
2982 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
2986 if ((acc
& IB_ACCESS_REMOTE_WRITE
) && !(acc
& IB_ACCESS_LOCAL_WRITE
))
2987 return ERR_PTR(-EINVAL
);
2989 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
2991 return ERR_PTR(status
);
2993 num_pbes
= count_kernel_pbes(buf_list
, buf_cnt
, &pbe_size
);
2994 if (num_pbes
== 0) {
2998 status
= ocrdma_get_pbl_info(dev
, mr
, num_pbes
);
3002 mr
->hwmr
.pbe_size
= pbe_size
;
3003 mr
->hwmr
.fbo
= *iova_start
- (buf_list
[0].addr
& PAGE_MASK
);
3004 mr
->hwmr
.va
= *iova_start
;
3005 mr
->hwmr
.local_rd
= 1;
3006 mr
->hwmr
.remote_wr
= (acc
& IB_ACCESS_REMOTE_WRITE
) ? 1 : 0;
3007 mr
->hwmr
.remote_rd
= (acc
& IB_ACCESS_REMOTE_READ
) ? 1 : 0;
3008 mr
->hwmr
.local_wr
= (acc
& IB_ACCESS_LOCAL_WRITE
) ? 1 : 0;
3009 mr
->hwmr
.remote_atomic
= (acc
& IB_ACCESS_REMOTE_ATOMIC
) ? 1 : 0;
3010 mr
->hwmr
.mw_bind
= (acc
& IB_ACCESS_MW_BIND
) ? 1 : 0;
3012 status
= ocrdma_build_pbl_tbl(dev
, &mr
->hwmr
);
3015 build_kernel_pbes(buf_list
, buf_cnt
, pbe_size
, mr
->hwmr
.pbl_table
,
3017 status
= ocrdma_reg_mr(dev
, &mr
->hwmr
, pd
->id
, acc
);
3021 mr
->ibmr
.lkey
= mr
->hwmr
.lkey
;
3022 if (mr
->hwmr
.remote_wr
|| mr
->hwmr
.remote_rd
)
3023 mr
->ibmr
.rkey
= mr
->hwmr
.lkey
;
3027 ocrdma_free_mr_pbl_tbl(dev
, &mr
->hwmr
);
3030 return ERR_PTR(status
);