2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of EITHER the GNU General Public License
6 * version 2 as published by the Free Software Foundation or the BSD
7 * 2-Clause License. This program is distributed in the hope that it
8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
10 * See the GNU General Public License version 2 for more details at
11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
13 * You should have received a copy of the GNU General Public License
14 * along with this program available in the file COPYING in the main
15 * directory of this source tree.
17 * The BSD 2-Clause License
19 * Redistribution and use in source and binary forms, with or
20 * without modification, are permitted provided that the following
23 * - Redistributions of source code must retain the above
24 * copyright notice, this list of conditions and the following
27 * - Redistributions in binary form must reproduce the above
28 * copyright notice, this list of conditions and the following
29 * disclaimer in the documentation and/or other materials
30 * provided with the distribution.
32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 #include <linux/wait.h>
49 #include <rdma/ib_addr.h>
50 #include <rdma/ib_smi.h>
51 #include <rdma/ib_user_verbs.h>
55 static void __pvrdma_destroy_qp(struct pvrdma_dev
*dev
,
56 struct pvrdma_qp
*qp
);
58 static inline void get_cqs(struct pvrdma_qp
*qp
, struct pvrdma_cq
**send_cq
,
59 struct pvrdma_cq
**recv_cq
)
61 *send_cq
= to_vcq(qp
->ibqp
.send_cq
);
62 *recv_cq
= to_vcq(qp
->ibqp
.recv_cq
);
65 static void pvrdma_lock_cqs(struct pvrdma_cq
*scq
, struct pvrdma_cq
*rcq
,
66 unsigned long *scq_flags
,
67 unsigned long *rcq_flags
)
68 __acquires(scq
->cq_lock
) __acquires(rcq
->cq_lock
)
71 spin_lock_irqsave(&scq
->cq_lock
, *scq_flags
);
72 __acquire(rcq
->cq_lock
);
73 } else if (scq
->cq_handle
< rcq
->cq_handle
) {
74 spin_lock_irqsave(&scq
->cq_lock
, *scq_flags
);
75 spin_lock_irqsave_nested(&rcq
->cq_lock
, *rcq_flags
,
76 SINGLE_DEPTH_NESTING
);
78 spin_lock_irqsave(&rcq
->cq_lock
, *rcq_flags
);
79 spin_lock_irqsave_nested(&scq
->cq_lock
, *scq_flags
,
80 SINGLE_DEPTH_NESTING
);
84 static void pvrdma_unlock_cqs(struct pvrdma_cq
*scq
, struct pvrdma_cq
*rcq
,
85 unsigned long *scq_flags
,
86 unsigned long *rcq_flags
)
87 __releases(scq
->cq_lock
) __releases(rcq
->cq_lock
)
90 __release(rcq
->cq_lock
);
91 spin_unlock_irqrestore(&scq
->cq_lock
, *scq_flags
);
92 } else if (scq
->cq_handle
< rcq
->cq_handle
) {
93 spin_unlock_irqrestore(&rcq
->cq_lock
, *rcq_flags
);
94 spin_unlock_irqrestore(&scq
->cq_lock
, *scq_flags
);
96 spin_unlock_irqrestore(&scq
->cq_lock
, *scq_flags
);
97 spin_unlock_irqrestore(&rcq
->cq_lock
, *rcq_flags
);
101 static void pvrdma_reset_qp(struct pvrdma_qp
*qp
)
103 struct pvrdma_cq
*scq
, *rcq
;
104 unsigned long scq_flags
, rcq_flags
;
107 get_cqs(qp
, &scq
, &rcq
);
108 pvrdma_lock_cqs(scq
, rcq
, &scq_flags
, &rcq_flags
);
110 _pvrdma_flush_cqe(qp
, scq
);
112 _pvrdma_flush_cqe(qp
, rcq
);
114 pvrdma_unlock_cqs(scq
, rcq
, &scq_flags
, &rcq_flags
);
117 * Reset queuepair. The checks are because usermode queuepairs won't
118 * have kernel ringstates.
121 atomic_set(&qp
->rq
.ring
->cons_head
, 0);
122 atomic_set(&qp
->rq
.ring
->prod_tail
, 0);
125 atomic_set(&qp
->sq
.ring
->cons_head
, 0);
126 atomic_set(&qp
->sq
.ring
->prod_tail
, 0);
130 static int pvrdma_set_rq_size(struct pvrdma_dev
*dev
,
131 struct ib_qp_cap
*req_cap
,
132 struct pvrdma_qp
*qp
)
134 if (req_cap
->max_recv_wr
> dev
->dsr
->caps
.max_qp_wr
||
135 req_cap
->max_recv_sge
> dev
->dsr
->caps
.max_sge
) {
136 dev_warn(&dev
->pdev
->dev
, "recv queue size invalid\n");
140 qp
->rq
.wqe_cnt
= roundup_pow_of_two(max(1U, req_cap
->max_recv_wr
));
141 qp
->rq
.max_sg
= roundup_pow_of_two(max(1U, req_cap
->max_recv_sge
));
144 req_cap
->max_recv_wr
= qp
->rq
.wqe_cnt
;
145 req_cap
->max_recv_sge
= qp
->rq
.max_sg
;
147 qp
->rq
.wqe_size
= roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr
) +
148 sizeof(struct pvrdma_sge
) *
150 qp
->npages_recv
= (qp
->rq
.wqe_cnt
* qp
->rq
.wqe_size
+ PAGE_SIZE
- 1) /
156 static int pvrdma_set_sq_size(struct pvrdma_dev
*dev
, struct ib_qp_cap
*req_cap
,
157 struct pvrdma_qp
*qp
)
159 if (req_cap
->max_send_wr
> dev
->dsr
->caps
.max_qp_wr
||
160 req_cap
->max_send_sge
> dev
->dsr
->caps
.max_sge
) {
161 dev_warn(&dev
->pdev
->dev
, "send queue size invalid\n");
165 qp
->sq
.wqe_cnt
= roundup_pow_of_two(max(1U, req_cap
->max_send_wr
));
166 qp
->sq
.max_sg
= roundup_pow_of_two(max(1U, req_cap
->max_send_sge
));
169 req_cap
->max_send_wr
= qp
->sq
.wqe_cnt
;
170 req_cap
->max_send_sge
= qp
->sq
.max_sg
;
172 qp
->sq
.wqe_size
= roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr
) +
173 sizeof(struct pvrdma_sge
) *
175 /* Note: one extra page for the header. */
176 qp
->npages_send
= PVRDMA_QP_NUM_HEADER_PAGES
+
177 (qp
->sq
.wqe_cnt
* qp
->sq
.wqe_size
+ PAGE_SIZE
- 1) /
184 * pvrdma_create_qp - create queue pair
185 * @pd: protection domain
186 * @init_attr: queue pair attributes
189 * @return: the ib_qp pointer on success, otherwise returns an errno.
191 struct ib_qp
*pvrdma_create_qp(struct ib_pd
*pd
,
192 struct ib_qp_init_attr
*init_attr
,
193 struct ib_udata
*udata
)
195 struct pvrdma_qp
*qp
= NULL
;
196 struct pvrdma_dev
*dev
= to_vdev(pd
->device
);
197 union pvrdma_cmd_req req
;
198 union pvrdma_cmd_resp rsp
;
199 struct pvrdma_cmd_create_qp
*cmd
= &req
.create_qp
;
200 struct pvrdma_cmd_create_qp_resp
*resp
= &rsp
.create_qp_resp
;
201 struct pvrdma_cmd_create_qp_resp_v2
*resp_v2
= &rsp
.create_qp_resp_v2
;
202 struct pvrdma_create_qp ucmd
;
203 struct pvrdma_create_qp_resp qp_resp
= {};
206 bool is_srq
= !!init_attr
->srq
;
208 if (init_attr
->create_flags
) {
209 dev_warn(&dev
->pdev
->dev
,
210 "invalid create queuepair flags %#x\n",
211 init_attr
->create_flags
);
212 return ERR_PTR(-EOPNOTSUPP
);
215 if (init_attr
->qp_type
!= IB_QPT_RC
&&
216 init_attr
->qp_type
!= IB_QPT_UD
&&
217 init_attr
->qp_type
!= IB_QPT_GSI
) {
218 dev_warn(&dev
->pdev
->dev
, "queuepair type %d not supported\n",
220 return ERR_PTR(-EOPNOTSUPP
);
223 if (is_srq
&& !dev
->dsr
->caps
.max_srq
) {
224 dev_warn(&dev
->pdev
->dev
,
225 "SRQs not supported by device\n");
226 return ERR_PTR(-EINVAL
);
229 if (!atomic_add_unless(&dev
->num_qps
, 1, dev
->dsr
->caps
.max_qp
))
230 return ERR_PTR(-ENOMEM
);
232 switch (init_attr
->qp_type
) {
234 if (init_attr
->port_num
== 0 ||
235 init_attr
->port_num
> pd
->device
->phys_port_cnt
) {
236 dev_warn(&dev
->pdev
->dev
, "invalid queuepair attrs\n");
243 qp
= kzalloc(sizeof(*qp
), GFP_KERNEL
);
249 spin_lock_init(&qp
->sq
.lock
);
250 spin_lock_init(&qp
->rq
.lock
);
251 mutex_init(&qp
->mutex
);
252 refcount_set(&qp
->refcnt
, 1);
253 init_completion(&qp
->free
);
255 qp
->state
= IB_QPS_RESET
;
256 qp
->is_kernel
= !udata
;
258 if (!qp
->is_kernel
) {
259 dev_dbg(&dev
->pdev
->dev
,
260 "create queuepair from user space\n");
262 if (ib_copy_from_udata(&ucmd
, udata
, sizeof(ucmd
))) {
267 /* Userspace supports qpn and qp handles? */
268 if (dev
->dsr_version
>= PVRDMA_QPHANDLE_VERSION
&&
269 udata
->outlen
< sizeof(qp_resp
)) {
270 dev_warn(&dev
->pdev
->dev
,
271 "create queuepair not supported\n");
277 /* set qp->sq.wqe_cnt, shift, buf_size.. */
279 ib_umem_get(pd
->device
, ucmd
.rbuf_addr
,
281 if (IS_ERR(qp
->rumem
)) {
282 ret
= PTR_ERR(qp
->rumem
);
288 qp
->srq
= to_vsrq(init_attr
->srq
);
291 qp
->sumem
= ib_umem_get(pd
->device
, ucmd
.sbuf_addr
,
293 if (IS_ERR(qp
->sumem
)) {
295 ib_umem_release(qp
->rumem
);
296 ret
= PTR_ERR(qp
->sumem
);
301 ib_umem_num_dma_blocks(qp
->sumem
, PAGE_SIZE
);
303 qp
->npages_recv
= ib_umem_num_dma_blocks(
304 qp
->rumem
, PAGE_SIZE
);
307 qp
->npages
= qp
->npages_send
+ qp
->npages_recv
;
309 ret
= pvrdma_set_sq_size(to_vdev(pd
->device
),
310 &init_attr
->cap
, qp
);
314 ret
= pvrdma_set_rq_size(to_vdev(pd
->device
),
315 &init_attr
->cap
, qp
);
319 qp
->npages
= qp
->npages_send
+ qp
->npages_recv
;
321 /* Skip header page. */
322 qp
->sq
.offset
= PVRDMA_QP_NUM_HEADER_PAGES
* PAGE_SIZE
;
324 /* Recv queue pages are after send pages. */
325 qp
->rq
.offset
= qp
->npages_send
* PAGE_SIZE
;
328 if (qp
->npages
< 0 || qp
->npages
> PVRDMA_PAGE_DIR_MAX_PAGES
) {
329 dev_warn(&dev
->pdev
->dev
,
330 "overflow pages in queuepair\n");
335 ret
= pvrdma_page_dir_init(dev
, &qp
->pdir
, qp
->npages
,
338 dev_warn(&dev
->pdev
->dev
,
339 "could not allocate page directory\n");
343 if (!qp
->is_kernel
) {
344 pvrdma_page_dir_insert_umem(&qp
->pdir
, qp
->sumem
, 0);
346 pvrdma_page_dir_insert_umem(&qp
->pdir
,
350 /* Ring state is always the first page. */
351 qp
->sq
.ring
= qp
->pdir
.pages
[0];
352 qp
->rq
.ring
= is_srq
? NULL
: &qp
->sq
.ring
[1];
361 init_attr
->cap
.max_inline_data
= 0;
363 memset(cmd
, 0, sizeof(*cmd
));
364 cmd
->hdr
.cmd
= PVRDMA_CMD_CREATE_QP
;
365 cmd
->pd_handle
= to_vpd(pd
)->pd_handle
;
366 cmd
->send_cq_handle
= to_vcq(init_attr
->send_cq
)->cq_handle
;
367 cmd
->recv_cq_handle
= to_vcq(init_attr
->recv_cq
)->cq_handle
;
369 cmd
->srq_handle
= to_vsrq(init_attr
->srq
)->srq_handle
;
372 cmd
->max_send_wr
= init_attr
->cap
.max_send_wr
;
373 cmd
->max_recv_wr
= init_attr
->cap
.max_recv_wr
;
374 cmd
->max_send_sge
= init_attr
->cap
.max_send_sge
;
375 cmd
->max_recv_sge
= init_attr
->cap
.max_recv_sge
;
376 cmd
->max_inline_data
= init_attr
->cap
.max_inline_data
;
377 cmd
->sq_sig_all
= (init_attr
->sq_sig_type
== IB_SIGNAL_ALL_WR
) ? 1 : 0;
378 cmd
->qp_type
= ib_qp_type_to_pvrdma(init_attr
->qp_type
);
379 cmd
->is_srq
= is_srq
;
381 cmd
->access_flags
= IB_ACCESS_LOCAL_WRITE
;
382 cmd
->total_chunks
= qp
->npages
;
383 cmd
->send_chunks
= qp
->npages_send
- PVRDMA_QP_NUM_HEADER_PAGES
;
384 cmd
->pdir_dma
= qp
->pdir
.dir_dma
;
386 dev_dbg(&dev
->pdev
->dev
, "create queuepair with %d, %d, %d, %d\n",
387 cmd
->max_send_wr
, cmd
->max_recv_wr
, cmd
->max_send_sge
,
390 ret
= pvrdma_cmd_post(dev
, &req
, &rsp
, PVRDMA_CMD_CREATE_QP_RESP
);
392 dev_warn(&dev
->pdev
->dev
,
393 "could not create queuepair, error: %d\n", ret
);
397 /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
398 qp
->port
= init_attr
->port_num
;
400 if (dev
->dsr_version
>= PVRDMA_QPHANDLE_VERSION
) {
401 qp
->ibqp
.qp_num
= resp_v2
->qpn
;
402 qp
->qp_handle
= resp_v2
->qp_handle
;
404 qp
->ibqp
.qp_num
= resp
->qpn
;
405 qp
->qp_handle
= resp
->qpn
;
408 spin_lock_irqsave(&dev
->qp_tbl_lock
, flags
);
409 dev
->qp_tbl
[qp
->qp_handle
% dev
->dsr
->caps
.max_qp
] = qp
;
410 spin_unlock_irqrestore(&dev
->qp_tbl_lock
, flags
);
413 qp_resp
.qpn
= qp
->ibqp
.qp_num
;
414 qp_resp
.qp_handle
= qp
->qp_handle
;
416 if (ib_copy_to_udata(udata
, &qp_resp
,
417 min(udata
->outlen
, sizeof(qp_resp
)))) {
418 dev_warn(&dev
->pdev
->dev
,
419 "failed to copy back udata\n");
420 __pvrdma_destroy_qp(dev
, qp
);
421 return ERR_PTR(-EINVAL
);
428 pvrdma_page_dir_cleanup(dev
, &qp
->pdir
);
430 ib_umem_release(qp
->rumem
);
431 ib_umem_release(qp
->sumem
);
434 atomic_dec(&dev
->num_qps
);
439 static void _pvrdma_free_qp(struct pvrdma_qp
*qp
)
442 struct pvrdma_dev
*dev
= to_vdev(qp
->ibqp
.device
);
444 spin_lock_irqsave(&dev
->qp_tbl_lock
, flags
);
445 dev
->qp_tbl
[qp
->qp_handle
] = NULL
;
446 spin_unlock_irqrestore(&dev
->qp_tbl_lock
, flags
);
448 if (refcount_dec_and_test(&qp
->refcnt
))
450 wait_for_completion(&qp
->free
);
452 ib_umem_release(qp
->rumem
);
453 ib_umem_release(qp
->sumem
);
455 pvrdma_page_dir_cleanup(dev
, &qp
->pdir
);
459 atomic_dec(&dev
->num_qps
);
462 static void pvrdma_free_qp(struct pvrdma_qp
*qp
)
464 struct pvrdma_cq
*scq
;
465 struct pvrdma_cq
*rcq
;
466 unsigned long scq_flags
, rcq_flags
;
468 /* In case cq is polling */
469 get_cqs(qp
, &scq
, &rcq
);
470 pvrdma_lock_cqs(scq
, rcq
, &scq_flags
, &rcq_flags
);
472 _pvrdma_flush_cqe(qp
, scq
);
474 _pvrdma_flush_cqe(qp
, rcq
);
477 * We're now unlocking the CQs before clearing out the qp handle this
478 * should still be safe. We have destroyed the backend QP and flushed
479 * the CQEs so there should be no other completions for this QP.
481 pvrdma_unlock_cqs(scq
, rcq
, &scq_flags
, &rcq_flags
);
486 static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev
*dev
,
489 union pvrdma_cmd_req req
;
490 struct pvrdma_cmd_destroy_qp
*cmd
= &req
.destroy_qp
;
493 memset(cmd
, 0, sizeof(*cmd
));
494 cmd
->hdr
.cmd
= PVRDMA_CMD_DESTROY_QP
;
495 cmd
->qp_handle
= qp_handle
;
497 ret
= pvrdma_cmd_post(dev
, &req
, NULL
, 0);
499 dev_warn(&dev
->pdev
->dev
,
500 "destroy queuepair failed, error: %d\n", ret
);
504 * pvrdma_destroy_qp - destroy a queue pair
505 * @qp: the queue pair to destroy
506 * @udata: user data or null for kernel object
510 int pvrdma_destroy_qp(struct ib_qp
*qp
, struct ib_udata
*udata
)
512 struct pvrdma_qp
*vqp
= to_vqp(qp
);
514 _pvrdma_destroy_qp_work(to_vdev(qp
->device
), vqp
->qp_handle
);
520 static void __pvrdma_destroy_qp(struct pvrdma_dev
*dev
,
521 struct pvrdma_qp
*qp
)
523 _pvrdma_destroy_qp_work(dev
, qp
->qp_handle
);
528 * pvrdma_modify_qp - modify queue pair attributes
529 * @ibqp: the queue pair
530 * @attr: the new queue pair's attributes
531 * @attr_mask: attributes mask
534 * @returns 0 on success, otherwise returns an errno.
536 int pvrdma_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
537 int attr_mask
, struct ib_udata
*udata
)
539 struct pvrdma_dev
*dev
= to_vdev(ibqp
->device
);
540 struct pvrdma_qp
*qp
= to_vqp(ibqp
);
541 union pvrdma_cmd_req req
;
542 union pvrdma_cmd_resp rsp
;
543 struct pvrdma_cmd_modify_qp
*cmd
= &req
.modify_qp
;
544 enum ib_qp_state cur_state
, next_state
;
547 if (attr_mask
& ~IB_QP_ATTR_STANDARD_BITS
)
550 /* Sanity checking. Should need lock here */
551 mutex_lock(&qp
->mutex
);
552 cur_state
= (attr_mask
& IB_QP_CUR_STATE
) ? attr
->cur_qp_state
:
554 next_state
= (attr_mask
& IB_QP_STATE
) ? attr
->qp_state
: cur_state
;
556 if (!ib_modify_qp_is_ok(cur_state
, next_state
, ibqp
->qp_type
,
562 if (attr_mask
& IB_QP_PORT
) {
563 if (attr
->port_num
== 0 ||
564 attr
->port_num
> ibqp
->device
->phys_port_cnt
) {
570 if (attr_mask
& IB_QP_MIN_RNR_TIMER
) {
571 if (attr
->min_rnr_timer
> 31) {
577 if (attr_mask
& IB_QP_PKEY_INDEX
) {
578 if (attr
->pkey_index
>= dev
->dsr
->caps
.max_pkeys
) {
584 if (attr_mask
& IB_QP_QKEY
)
585 qp
->qkey
= attr
->qkey
;
587 if (cur_state
== next_state
&& cur_state
== IB_QPS_RESET
) {
592 qp
->state
= next_state
;
593 memset(cmd
, 0, sizeof(*cmd
));
594 cmd
->hdr
.cmd
= PVRDMA_CMD_MODIFY_QP
;
595 cmd
->qp_handle
= qp
->qp_handle
;
596 cmd
->attr_mask
= ib_qp_attr_mask_to_pvrdma(attr_mask
);
597 cmd
->attrs
.qp_state
= ib_qp_state_to_pvrdma(attr
->qp_state
);
598 cmd
->attrs
.cur_qp_state
=
599 ib_qp_state_to_pvrdma(attr
->cur_qp_state
);
600 cmd
->attrs
.path_mtu
= ib_mtu_to_pvrdma(attr
->path_mtu
);
601 cmd
->attrs
.path_mig_state
=
602 ib_mig_state_to_pvrdma(attr
->path_mig_state
);
603 cmd
->attrs
.qkey
= attr
->qkey
;
604 cmd
->attrs
.rq_psn
= attr
->rq_psn
;
605 cmd
->attrs
.sq_psn
= attr
->sq_psn
;
606 cmd
->attrs
.dest_qp_num
= attr
->dest_qp_num
;
607 cmd
->attrs
.qp_access_flags
=
608 ib_access_flags_to_pvrdma(attr
->qp_access_flags
);
609 cmd
->attrs
.pkey_index
= attr
->pkey_index
;
610 cmd
->attrs
.alt_pkey_index
= attr
->alt_pkey_index
;
611 cmd
->attrs
.en_sqd_async_notify
= attr
->en_sqd_async_notify
;
612 cmd
->attrs
.sq_draining
= attr
->sq_draining
;
613 cmd
->attrs
.max_rd_atomic
= attr
->max_rd_atomic
;
614 cmd
->attrs
.max_dest_rd_atomic
= attr
->max_dest_rd_atomic
;
615 cmd
->attrs
.min_rnr_timer
= attr
->min_rnr_timer
;
616 cmd
->attrs
.port_num
= attr
->port_num
;
617 cmd
->attrs
.timeout
= attr
->timeout
;
618 cmd
->attrs
.retry_cnt
= attr
->retry_cnt
;
619 cmd
->attrs
.rnr_retry
= attr
->rnr_retry
;
620 cmd
->attrs
.alt_port_num
= attr
->alt_port_num
;
621 cmd
->attrs
.alt_timeout
= attr
->alt_timeout
;
622 ib_qp_cap_to_pvrdma(&cmd
->attrs
.cap
, &attr
->cap
);
623 rdma_ah_attr_to_pvrdma(&cmd
->attrs
.ah_attr
, &attr
->ah_attr
);
624 rdma_ah_attr_to_pvrdma(&cmd
->attrs
.alt_ah_attr
, &attr
->alt_ah_attr
);
626 ret
= pvrdma_cmd_post(dev
, &req
, &rsp
, PVRDMA_CMD_MODIFY_QP_RESP
);
628 dev_warn(&dev
->pdev
->dev
,
629 "could not modify queuepair, error: %d\n", ret
);
630 } else if (rsp
.hdr
.err
> 0) {
631 dev_warn(&dev
->pdev
->dev
,
632 "cannot modify queuepair, error: %d\n", rsp
.hdr
.err
);
636 if (ret
== 0 && next_state
== IB_QPS_RESET
)
640 mutex_unlock(&qp
->mutex
);
645 static inline void *get_sq_wqe(struct pvrdma_qp
*qp
, unsigned int n
)
647 return pvrdma_page_dir_get_ptr(&qp
->pdir
,
648 qp
->sq
.offset
+ n
* qp
->sq
.wqe_size
);
651 static inline void *get_rq_wqe(struct pvrdma_qp
*qp
, unsigned int n
)
653 return pvrdma_page_dir_get_ptr(&qp
->pdir
,
654 qp
->rq
.offset
+ n
* qp
->rq
.wqe_size
);
657 static int set_reg_seg(struct pvrdma_sq_wqe_hdr
*wqe_hdr
,
658 const struct ib_reg_wr
*wr
)
660 struct pvrdma_user_mr
*mr
= to_vmr(wr
->mr
);
662 wqe_hdr
->wr
.fast_reg
.iova_start
= mr
->ibmr
.iova
;
663 wqe_hdr
->wr
.fast_reg
.pl_pdir_dma
= mr
->pdir
.dir_dma
;
664 wqe_hdr
->wr
.fast_reg
.page_shift
= mr
->page_shift
;
665 wqe_hdr
->wr
.fast_reg
.page_list_len
= mr
->npages
;
666 wqe_hdr
->wr
.fast_reg
.length
= mr
->ibmr
.length
;
667 wqe_hdr
->wr
.fast_reg
.access_flags
= wr
->access
;
668 wqe_hdr
->wr
.fast_reg
.rkey
= wr
->key
;
670 return pvrdma_page_dir_insert_page_list(&mr
->pdir
, mr
->pages
,
675 * pvrdma_post_send - post send work request entries on a QP
677 * @wr: work request list to post
678 * @bad_wr: the first bad WR returned
680 * @return: 0 on success, otherwise errno returned.
682 int pvrdma_post_send(struct ib_qp
*ibqp
, const struct ib_send_wr
*wr
,
683 const struct ib_send_wr
**bad_wr
)
685 struct pvrdma_qp
*qp
= to_vqp(ibqp
);
686 struct pvrdma_dev
*dev
= to_vdev(ibqp
->device
);
688 struct pvrdma_sq_wqe_hdr
*wqe_hdr
;
689 struct pvrdma_sge
*sge
;
693 * In states lower than RTS, we can fail immediately. In other states,
694 * just post and let the device figure it out.
696 if (qp
->state
< IB_QPS_RTS
) {
701 spin_lock_irqsave(&qp
->sq
.lock
, flags
);
704 unsigned int tail
= 0;
706 if (unlikely(!pvrdma_idx_ring_has_space(
707 qp
->sq
.ring
, qp
->sq
.wqe_cnt
, &tail
))) {
708 dev_warn_ratelimited(&dev
->pdev
->dev
,
709 "send queue is full\n");
715 if (unlikely(wr
->num_sge
> qp
->sq
.max_sg
|| wr
->num_sge
< 0)) {
716 dev_warn_ratelimited(&dev
->pdev
->dev
,
717 "send SGE overflow\n");
723 if (unlikely(wr
->opcode
< 0)) {
724 dev_warn_ratelimited(&dev
->pdev
->dev
,
725 "invalid send opcode\n");
732 * Only support UD, RC.
733 * Need to check opcode table for thorough checking.
736 * _SEND_WITH_IMM x x x
738 * _RDMA_WRITE_WITH_IMM x x
742 * _ATOMIC_CMP_AND_SWP x
743 * _ATOMIC_FETCH_AND_ADD x
744 * _MASK_ATOMIC_CMP_AND_SWP x
745 * _MASK_ATOMIC_FETCH_AND_ADD x
749 if (qp
->ibqp
.qp_type
!= IB_QPT_UD
&&
750 qp
->ibqp
.qp_type
!= IB_QPT_RC
&&
751 wr
->opcode
!= IB_WR_SEND
) {
752 dev_warn_ratelimited(&dev
->pdev
->dev
,
753 "unsupported queuepair type\n");
757 } else if (qp
->ibqp
.qp_type
== IB_QPT_UD
||
758 qp
->ibqp
.qp_type
== IB_QPT_GSI
) {
759 if (wr
->opcode
!= IB_WR_SEND
&&
760 wr
->opcode
!= IB_WR_SEND_WITH_IMM
) {
761 dev_warn_ratelimited(&dev
->pdev
->dev
,
762 "invalid send opcode\n");
769 wqe_hdr
= (struct pvrdma_sq_wqe_hdr
*)get_sq_wqe(qp
, tail
);
770 memset(wqe_hdr
, 0, sizeof(*wqe_hdr
));
771 wqe_hdr
->wr_id
= wr
->wr_id
;
772 wqe_hdr
->num_sge
= wr
->num_sge
;
773 wqe_hdr
->opcode
= ib_wr_opcode_to_pvrdma(wr
->opcode
);
774 wqe_hdr
->send_flags
= ib_send_flags_to_pvrdma(wr
->send_flags
);
775 if (wr
->opcode
== IB_WR_SEND_WITH_IMM
||
776 wr
->opcode
== IB_WR_RDMA_WRITE_WITH_IMM
)
777 wqe_hdr
->ex
.imm_data
= wr
->ex
.imm_data
;
779 if (unlikely(wqe_hdr
->opcode
== PVRDMA_WR_ERROR
)) {
785 switch (qp
->ibqp
.qp_type
) {
788 if (unlikely(!ud_wr(wr
)->ah
)) {
789 dev_warn_ratelimited(&dev
->pdev
->dev
,
790 "invalid address handle\n");
797 * Use qkey from qp context if high order bit set,
798 * otherwise from work request.
800 wqe_hdr
->wr
.ud
.remote_qpn
= ud_wr(wr
)->remote_qpn
;
801 wqe_hdr
->wr
.ud
.remote_qkey
=
802 ud_wr(wr
)->remote_qkey
& 0x80000000 ?
803 qp
->qkey
: ud_wr(wr
)->remote_qkey
;
804 wqe_hdr
->wr
.ud
.av
= to_vah(ud_wr(wr
)->ah
)->av
;
808 switch (wr
->opcode
) {
809 case IB_WR_RDMA_READ
:
810 case IB_WR_RDMA_WRITE
:
811 case IB_WR_RDMA_WRITE_WITH_IMM
:
812 wqe_hdr
->wr
.rdma
.remote_addr
=
813 rdma_wr(wr
)->remote_addr
;
814 wqe_hdr
->wr
.rdma
.rkey
= rdma_wr(wr
)->rkey
;
816 case IB_WR_LOCAL_INV
:
817 case IB_WR_SEND_WITH_INV
:
818 wqe_hdr
->ex
.invalidate_rkey
=
819 wr
->ex
.invalidate_rkey
;
821 case IB_WR_ATOMIC_CMP_AND_SWP
:
822 case IB_WR_ATOMIC_FETCH_AND_ADD
:
823 wqe_hdr
->wr
.atomic
.remote_addr
=
824 atomic_wr(wr
)->remote_addr
;
825 wqe_hdr
->wr
.atomic
.rkey
= atomic_wr(wr
)->rkey
;
826 wqe_hdr
->wr
.atomic
.compare_add
=
827 atomic_wr(wr
)->compare_add
;
828 if (wr
->opcode
== IB_WR_ATOMIC_CMP_AND_SWP
)
829 wqe_hdr
->wr
.atomic
.swap
=
833 ret
= set_reg_seg(wqe_hdr
, reg_wr(wr
));
835 dev_warn_ratelimited(&dev
->pdev
->dev
,
836 "Failed to set fast register work request\n");
847 dev_warn_ratelimited(&dev
->pdev
->dev
,
848 "invalid queuepair type\n");
854 sge
= (struct pvrdma_sge
*)(wqe_hdr
+ 1);
855 for (i
= 0; i
< wr
->num_sge
; i
++) {
856 /* Need to check wqe_size 0 or max size */
857 sge
->addr
= wr
->sg_list
[i
].addr
;
858 sge
->length
= wr
->sg_list
[i
].length
;
859 sge
->lkey
= wr
->sg_list
[i
].lkey
;
863 /* Make sure wqe is written before index update */
866 /* Update shared sq ring */
867 pvrdma_idx_ring_inc(&qp
->sq
.ring
->prod_tail
,
876 spin_unlock_irqrestore(&qp
->sq
.lock
, flags
);
879 pvrdma_write_uar_qp(dev
, PVRDMA_UAR_QP_SEND
| qp
->qp_handle
);
885 * pvrdma_post_receive - post receive work request entries on a QP
887 * @wr: the work request list to post
888 * @bad_wr: the first bad WR returned
890 * @return: 0 on success, otherwise errno returned.
892 int pvrdma_post_recv(struct ib_qp
*ibqp
, const struct ib_recv_wr
*wr
,
893 const struct ib_recv_wr
**bad_wr
)
895 struct pvrdma_dev
*dev
= to_vdev(ibqp
->device
);
897 struct pvrdma_qp
*qp
= to_vqp(ibqp
);
898 struct pvrdma_rq_wqe_hdr
*wqe_hdr
;
899 struct pvrdma_sge
*sge
;
904 * In the RESET state, we can fail immediately. For other states,
905 * just post and let the device figure it out.
907 if (qp
->state
== IB_QPS_RESET
) {
913 dev_warn(&dev
->pdev
->dev
, "QP associated with SRQ\n");
918 spin_lock_irqsave(&qp
->rq
.lock
, flags
);
921 unsigned int tail
= 0;
923 if (unlikely(wr
->num_sge
> qp
->rq
.max_sg
||
927 dev_warn_ratelimited(&dev
->pdev
->dev
,
928 "recv SGE overflow\n");
932 if (unlikely(!pvrdma_idx_ring_has_space(
933 qp
->rq
.ring
, qp
->rq
.wqe_cnt
, &tail
))) {
936 dev_warn_ratelimited(&dev
->pdev
->dev
,
937 "recv queue full\n");
941 wqe_hdr
= (struct pvrdma_rq_wqe_hdr
*)get_rq_wqe(qp
, tail
);
942 wqe_hdr
->wr_id
= wr
->wr_id
;
943 wqe_hdr
->num_sge
= wr
->num_sge
;
944 wqe_hdr
->total_len
= 0;
946 sge
= (struct pvrdma_sge
*)(wqe_hdr
+ 1);
947 for (i
= 0; i
< wr
->num_sge
; i
++) {
948 sge
->addr
= wr
->sg_list
[i
].addr
;
949 sge
->length
= wr
->sg_list
[i
].length
;
950 sge
->lkey
= wr
->sg_list
[i
].lkey
;
954 /* Make sure wqe is written before index update */
957 /* Update shared rq ring */
958 pvrdma_idx_ring_inc(&qp
->rq
.ring
->prod_tail
,
964 spin_unlock_irqrestore(&qp
->rq
.lock
, flags
);
966 pvrdma_write_uar_qp(dev
, PVRDMA_UAR_QP_RECV
| qp
->qp_handle
);
971 spin_unlock_irqrestore(&qp
->rq
.lock
, flags
);
977 * pvrdma_query_qp - query a queue pair's attributes
978 * @ibqp: the queue pair to query
979 * @attr: the queue pair's attributes
980 * @attr_mask: attributes mask
981 * @init_attr: initial queue pair attributes
983 * @returns 0 on success, otherwise returns an errno.
985 int pvrdma_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
986 int attr_mask
, struct ib_qp_init_attr
*init_attr
)
988 struct pvrdma_dev
*dev
= to_vdev(ibqp
->device
);
989 struct pvrdma_qp
*qp
= to_vqp(ibqp
);
990 union pvrdma_cmd_req req
;
991 union pvrdma_cmd_resp rsp
;
992 struct pvrdma_cmd_query_qp
*cmd
= &req
.query_qp
;
993 struct pvrdma_cmd_query_qp_resp
*resp
= &rsp
.query_qp_resp
;
996 mutex_lock(&qp
->mutex
);
998 if (qp
->state
== IB_QPS_RESET
) {
999 attr
->qp_state
= IB_QPS_RESET
;
1003 memset(cmd
, 0, sizeof(*cmd
));
1004 cmd
->hdr
.cmd
= PVRDMA_CMD_QUERY_QP
;
1005 cmd
->qp_handle
= qp
->qp_handle
;
1006 cmd
->attr_mask
= ib_qp_attr_mask_to_pvrdma(attr_mask
);
1008 ret
= pvrdma_cmd_post(dev
, &req
, &rsp
, PVRDMA_CMD_QUERY_QP_RESP
);
1010 dev_warn(&dev
->pdev
->dev
,
1011 "could not query queuepair, error: %d\n", ret
);
1015 attr
->qp_state
= pvrdma_qp_state_to_ib(resp
->attrs
.qp_state
);
1016 attr
->cur_qp_state
=
1017 pvrdma_qp_state_to_ib(resp
->attrs
.cur_qp_state
);
1018 attr
->path_mtu
= pvrdma_mtu_to_ib(resp
->attrs
.path_mtu
);
1019 attr
->path_mig_state
=
1020 pvrdma_mig_state_to_ib(resp
->attrs
.path_mig_state
);
1021 attr
->qkey
= resp
->attrs
.qkey
;
1022 attr
->rq_psn
= resp
->attrs
.rq_psn
;
1023 attr
->sq_psn
= resp
->attrs
.sq_psn
;
1024 attr
->dest_qp_num
= resp
->attrs
.dest_qp_num
;
1025 attr
->qp_access_flags
=
1026 pvrdma_access_flags_to_ib(resp
->attrs
.qp_access_flags
);
1027 attr
->pkey_index
= resp
->attrs
.pkey_index
;
1028 attr
->alt_pkey_index
= resp
->attrs
.alt_pkey_index
;
1029 attr
->en_sqd_async_notify
= resp
->attrs
.en_sqd_async_notify
;
1030 attr
->sq_draining
= resp
->attrs
.sq_draining
;
1031 attr
->max_rd_atomic
= resp
->attrs
.max_rd_atomic
;
1032 attr
->max_dest_rd_atomic
= resp
->attrs
.max_dest_rd_atomic
;
1033 attr
->min_rnr_timer
= resp
->attrs
.min_rnr_timer
;
1034 attr
->port_num
= resp
->attrs
.port_num
;
1035 attr
->timeout
= resp
->attrs
.timeout
;
1036 attr
->retry_cnt
= resp
->attrs
.retry_cnt
;
1037 attr
->rnr_retry
= resp
->attrs
.rnr_retry
;
1038 attr
->alt_port_num
= resp
->attrs
.alt_port_num
;
1039 attr
->alt_timeout
= resp
->attrs
.alt_timeout
;
1040 pvrdma_qp_cap_to_ib(&attr
->cap
, &resp
->attrs
.cap
);
1041 pvrdma_ah_attr_to_rdma(&attr
->ah_attr
, &resp
->attrs
.ah_attr
);
1042 pvrdma_ah_attr_to_rdma(&attr
->alt_ah_attr
, &resp
->attrs
.alt_ah_attr
);
1044 qp
->state
= attr
->qp_state
;
1049 attr
->cur_qp_state
= attr
->qp_state
;
1051 init_attr
->event_handler
= qp
->ibqp
.event_handler
;
1052 init_attr
->qp_context
= qp
->ibqp
.qp_context
;
1053 init_attr
->send_cq
= qp
->ibqp
.send_cq
;
1054 init_attr
->recv_cq
= qp
->ibqp
.recv_cq
;
1055 init_attr
->srq
= qp
->ibqp
.srq
;
1056 init_attr
->xrcd
= NULL
;
1057 init_attr
->cap
= attr
->cap
;
1058 init_attr
->sq_sig_type
= 0;
1059 init_attr
->qp_type
= qp
->ibqp
.qp_type
;
1060 init_attr
->create_flags
= 0;
1061 init_attr
->port_num
= qp
->port
;
1063 mutex_unlock(&qp
->mutex
);