2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/dma-mapping.h>
35 #include <net/addrconf.h>
38 #include "rxe_queue.h"
39 #include "rxe_hw_counters.h"
41 static int rxe_query_device(struct ib_device
*dev
,
42 struct ib_device_attr
*attr
,
45 struct rxe_dev
*rxe
= to_rdev(dev
);
47 if (uhw
->inlen
|| uhw
->outlen
)
54 static int rxe_query_port(struct ib_device
*dev
,
55 u8 port_num
, struct ib_port_attr
*attr
)
57 struct rxe_dev
*rxe
= to_rdev(dev
);
58 struct rxe_port
*port
;
61 if (unlikely(port_num
!= 1)) {
62 pr_warn("invalid port_number %d\n", port_num
);
68 /* *attr being zeroed by the caller, avoid zeroing it here */
71 mutex_lock(&rxe
->usdev_lock
);
72 rc
= ib_get_eth_speed(dev
, port_num
, &attr
->active_speed
,
74 mutex_unlock(&rxe
->usdev_lock
);
80 static struct net_device
*rxe_get_netdev(struct ib_device
*device
,
83 struct rxe_dev
*rxe
= to_rdev(device
);
93 static int rxe_query_pkey(struct ib_device
*device
,
94 u8 port_num
, u16 index
, u16
*pkey
)
96 struct rxe_dev
*rxe
= to_rdev(device
);
97 struct rxe_port
*port
;
99 if (unlikely(port_num
!= 1)) {
100 dev_warn(device
->dev
.parent
, "invalid port_num = %d\n",
107 if (unlikely(index
>= port
->attr
.pkey_tbl_len
)) {
108 dev_warn(device
->dev
.parent
, "invalid index = %d\n",
113 *pkey
= port
->pkey_tbl
[index
];
120 static int rxe_modify_device(struct ib_device
*dev
,
121 int mask
, struct ib_device_modify
*attr
)
123 struct rxe_dev
*rxe
= to_rdev(dev
);
125 if (mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
)
126 rxe
->attr
.sys_image_guid
= cpu_to_be64(attr
->sys_image_guid
);
128 if (mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
129 memcpy(rxe
->ib_dev
.node_desc
,
130 attr
->node_desc
, sizeof(rxe
->ib_dev
.node_desc
));
136 static int rxe_modify_port(struct ib_device
*dev
,
137 u8 port_num
, int mask
, struct ib_port_modify
*attr
)
139 struct rxe_dev
*rxe
= to_rdev(dev
);
140 struct rxe_port
*port
;
142 if (unlikely(port_num
!= 1)) {
143 pr_warn("invalid port_num = %d\n", port_num
);
149 port
->attr
.port_cap_flags
|= attr
->set_port_cap_mask
;
150 port
->attr
.port_cap_flags
&= ~attr
->clr_port_cap_mask
;
152 if (mask
& IB_PORT_RESET_QKEY_CNTR
)
153 port
->attr
.qkey_viol_cntr
= 0;
161 static enum rdma_link_layer
rxe_get_link_layer(struct ib_device
*dev
,
164 struct rxe_dev
*rxe
= to_rdev(dev
);
166 return rxe_link_layer(rxe
, port_num
);
169 static struct ib_ucontext
*rxe_alloc_ucontext(struct ib_device
*dev
,
170 struct ib_udata
*udata
)
172 struct rxe_dev
*rxe
= to_rdev(dev
);
173 struct rxe_ucontext
*uc
;
175 uc
= rxe_alloc(&rxe
->uc_pool
);
176 return uc
? &uc
->ibuc
: ERR_PTR(-ENOMEM
);
179 static int rxe_dealloc_ucontext(struct ib_ucontext
*ibuc
)
181 struct rxe_ucontext
*uc
= to_ruc(ibuc
);
187 static int rxe_port_immutable(struct ib_device
*dev
, u8 port_num
,
188 struct ib_port_immutable
*immutable
)
191 struct ib_port_attr attr
;
193 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP
;
195 err
= ib_query_port(dev
, port_num
, &attr
);
199 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
200 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
201 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
206 static struct ib_pd
*rxe_alloc_pd(struct ib_device
*dev
,
207 struct ib_ucontext
*context
,
208 struct ib_udata
*udata
)
210 struct rxe_dev
*rxe
= to_rdev(dev
);
213 pd
= rxe_alloc(&rxe
->pd_pool
);
214 return pd
? &pd
->ibpd
: ERR_PTR(-ENOMEM
);
217 static int rxe_dealloc_pd(struct ib_pd
*ibpd
)
219 struct rxe_pd
*pd
= to_rpd(ibpd
);
225 static void rxe_init_av(struct rxe_dev
*rxe
, struct rdma_ah_attr
*attr
,
228 rxe_av_from_attr(rdma_ah_get_port_num(attr
), av
, attr
);
229 rxe_av_fill_ip_info(av
, attr
);
232 static struct ib_ah
*rxe_create_ah(struct ib_pd
*ibpd
,
233 struct rdma_ah_attr
*attr
,
234 struct ib_udata
*udata
)
238 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
239 struct rxe_pd
*pd
= to_rpd(ibpd
);
242 err
= rxe_av_chk_attr(rxe
, attr
);
246 ah
= rxe_alloc(&rxe
->ah_pool
);
248 return ERR_PTR(-ENOMEM
);
253 rxe_init_av(rxe
, attr
, &ah
->av
);
257 static int rxe_modify_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
)
260 struct rxe_dev
*rxe
= to_rdev(ibah
->device
);
261 struct rxe_ah
*ah
= to_rah(ibah
);
263 err
= rxe_av_chk_attr(rxe
, attr
);
267 rxe_init_av(rxe
, attr
, &ah
->av
);
271 static int rxe_query_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
)
273 struct rxe_ah
*ah
= to_rah(ibah
);
275 memset(attr
, 0, sizeof(*attr
));
276 attr
->type
= ibah
->type
;
277 rxe_av_to_attr(&ah
->av
, attr
);
281 static int rxe_destroy_ah(struct ib_ah
*ibah
)
283 struct rxe_ah
*ah
= to_rah(ibah
);
285 rxe_drop_ref(ah
->pd
);
290 static int post_one_recv(struct rxe_rq
*rq
, const struct ib_recv_wr
*ibwr
)
295 struct rxe_recv_wqe
*recv_wqe
;
296 int num_sge
= ibwr
->num_sge
;
298 if (unlikely(queue_full(rq
->queue
))) {
303 if (unlikely(num_sge
> rq
->max_sge
)) {
309 for (i
= 0; i
< num_sge
; i
++)
310 length
+= ibwr
->sg_list
[i
].length
;
312 recv_wqe
= producer_addr(rq
->queue
);
313 recv_wqe
->wr_id
= ibwr
->wr_id
;
314 recv_wqe
->num_sge
= num_sge
;
316 memcpy(recv_wqe
->dma
.sge
, ibwr
->sg_list
,
317 num_sge
* sizeof(struct ib_sge
));
319 recv_wqe
->dma
.length
= length
;
320 recv_wqe
->dma
.resid
= length
;
321 recv_wqe
->dma
.num_sge
= num_sge
;
322 recv_wqe
->dma
.cur_sge
= 0;
323 recv_wqe
->dma
.sge_offset
= 0;
325 /* make sure all changes to the work queue are written before we
326 * update the producer pointer
330 advance_producer(rq
->queue
);
337 static struct ib_srq
*rxe_create_srq(struct ib_pd
*ibpd
,
338 struct ib_srq_init_attr
*init
,
339 struct ib_udata
*udata
)
342 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
343 struct rxe_pd
*pd
= to_rpd(ibpd
);
345 struct ib_ucontext
*context
= udata
? ibpd
->uobject
->context
: NULL
;
346 struct rxe_create_srq_resp __user
*uresp
= NULL
;
349 if (udata
->outlen
< sizeof(*uresp
))
350 return ERR_PTR(-EINVAL
);
351 uresp
= udata
->outbuf
;
354 err
= rxe_srq_chk_attr(rxe
, NULL
, &init
->attr
, IB_SRQ_INIT_MASK
);
358 srq
= rxe_alloc(&rxe
->srq_pool
);
368 err
= rxe_srq_from_init(rxe
, srq
, init
, context
, uresp
);
382 static int rxe_modify_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
,
383 enum ib_srq_attr_mask mask
,
384 struct ib_udata
*udata
)
387 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
388 struct rxe_dev
*rxe
= to_rdev(ibsrq
->device
);
389 struct rxe_modify_srq_cmd ucmd
= {};
392 if (udata
->inlen
< sizeof(ucmd
))
395 err
= ib_copy_from_udata(&ucmd
, udata
, sizeof(ucmd
));
400 err
= rxe_srq_chk_attr(rxe
, srq
, attr
, mask
);
404 err
= rxe_srq_from_attr(rxe
, srq
, attr
, mask
, &ucmd
);
414 static int rxe_query_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
)
416 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
421 attr
->max_wr
= srq
->rq
.queue
->buf
->index_mask
;
422 attr
->max_sge
= srq
->rq
.max_sge
;
423 attr
->srq_limit
= srq
->limit
;
427 static int rxe_destroy_srq(struct ib_srq
*ibsrq
)
429 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
432 rxe_queue_cleanup(srq
->rq
.queue
);
434 rxe_drop_ref(srq
->pd
);
441 static int rxe_post_srq_recv(struct ib_srq
*ibsrq
, const struct ib_recv_wr
*wr
,
442 const struct ib_recv_wr
**bad_wr
)
446 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
448 spin_lock_irqsave(&srq
->rq
.producer_lock
, flags
);
451 err
= post_one_recv(&srq
->rq
, wr
);
457 spin_unlock_irqrestore(&srq
->rq
.producer_lock
, flags
);
465 static struct ib_qp
*rxe_create_qp(struct ib_pd
*ibpd
,
466 struct ib_qp_init_attr
*init
,
467 struct ib_udata
*udata
)
470 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
471 struct rxe_pd
*pd
= to_rpd(ibpd
);
473 struct rxe_create_qp_resp __user
*uresp
= NULL
;
476 if (udata
->outlen
< sizeof(*uresp
))
477 return ERR_PTR(-EINVAL
);
478 uresp
= udata
->outbuf
;
481 err
= rxe_qp_chk_init(rxe
, init
);
485 qp
= rxe_alloc(&rxe
->qp_pool
);
501 err
= rxe_qp_from_init(rxe
, qp
, pd
, init
, uresp
, ibpd
);
515 static int rxe_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
516 int mask
, struct ib_udata
*udata
)
519 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
520 struct rxe_qp
*qp
= to_rqp(ibqp
);
522 err
= rxe_qp_chk_attr(rxe
, qp
, attr
, mask
);
526 err
= rxe_qp_from_attr(qp
, attr
, mask
, udata
);
536 static int rxe_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
537 int mask
, struct ib_qp_init_attr
*init
)
539 struct rxe_qp
*qp
= to_rqp(ibqp
);
541 rxe_qp_to_init(qp
, init
);
542 rxe_qp_to_attr(qp
, attr
, mask
);
547 static int rxe_destroy_qp(struct ib_qp
*ibqp
)
549 struct rxe_qp
*qp
= to_rqp(ibqp
);
557 static int validate_send_wr(struct rxe_qp
*qp
, const struct ib_send_wr
*ibwr
,
558 unsigned int mask
, unsigned int length
)
560 int num_sge
= ibwr
->num_sge
;
561 struct rxe_sq
*sq
= &qp
->sq
;
563 if (unlikely(num_sge
> sq
->max_sge
))
566 if (unlikely(mask
& WR_ATOMIC_MASK
)) {
570 if (atomic_wr(ibwr
)->remote_addr
& 0x7)
574 if (unlikely((ibwr
->send_flags
& IB_SEND_INLINE
) &&
575 (length
> sq
->max_inline
)))
584 static void init_send_wr(struct rxe_qp
*qp
, struct rxe_send_wr
*wr
,
585 const struct ib_send_wr
*ibwr
)
587 wr
->wr_id
= ibwr
->wr_id
;
588 wr
->num_sge
= ibwr
->num_sge
;
589 wr
->opcode
= ibwr
->opcode
;
590 wr
->send_flags
= ibwr
->send_flags
;
592 if (qp_type(qp
) == IB_QPT_UD
||
593 qp_type(qp
) == IB_QPT_SMI
||
594 qp_type(qp
) == IB_QPT_GSI
) {
595 wr
->wr
.ud
.remote_qpn
= ud_wr(ibwr
)->remote_qpn
;
596 wr
->wr
.ud
.remote_qkey
= ud_wr(ibwr
)->remote_qkey
;
597 if (qp_type(qp
) == IB_QPT_GSI
)
598 wr
->wr
.ud
.pkey_index
= ud_wr(ibwr
)->pkey_index
;
599 if (wr
->opcode
== IB_WR_SEND_WITH_IMM
)
600 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
602 switch (wr
->opcode
) {
603 case IB_WR_RDMA_WRITE_WITH_IMM
:
604 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
606 case IB_WR_RDMA_READ
:
607 case IB_WR_RDMA_WRITE
:
608 wr
->wr
.rdma
.remote_addr
= rdma_wr(ibwr
)->remote_addr
;
609 wr
->wr
.rdma
.rkey
= rdma_wr(ibwr
)->rkey
;
611 case IB_WR_SEND_WITH_IMM
:
612 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
614 case IB_WR_SEND_WITH_INV
:
615 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
617 case IB_WR_ATOMIC_CMP_AND_SWP
:
618 case IB_WR_ATOMIC_FETCH_AND_ADD
:
619 wr
->wr
.atomic
.remote_addr
=
620 atomic_wr(ibwr
)->remote_addr
;
621 wr
->wr
.atomic
.compare_add
=
622 atomic_wr(ibwr
)->compare_add
;
623 wr
->wr
.atomic
.swap
= atomic_wr(ibwr
)->swap
;
624 wr
->wr
.atomic
.rkey
= atomic_wr(ibwr
)->rkey
;
626 case IB_WR_LOCAL_INV
:
627 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
630 wr
->wr
.reg
.mr
= reg_wr(ibwr
)->mr
;
631 wr
->wr
.reg
.key
= reg_wr(ibwr
)->key
;
632 wr
->wr
.reg
.access
= reg_wr(ibwr
)->access
;
640 static int init_send_wqe(struct rxe_qp
*qp
, const struct ib_send_wr
*ibwr
,
641 unsigned int mask
, unsigned int length
,
642 struct rxe_send_wqe
*wqe
)
644 int num_sge
= ibwr
->num_sge
;
649 init_send_wr(qp
, &wqe
->wr
, ibwr
);
651 if (qp_type(qp
) == IB_QPT_UD
||
652 qp_type(qp
) == IB_QPT_SMI
||
653 qp_type(qp
) == IB_QPT_GSI
)
654 memcpy(&wqe
->av
, &to_rah(ud_wr(ibwr
)->ah
)->av
, sizeof(wqe
->av
));
656 if (unlikely(ibwr
->send_flags
& IB_SEND_INLINE
)) {
657 p
= wqe
->dma
.inline_data
;
660 for (i
= 0; i
< num_sge
; i
++, sge
++) {
661 memcpy(p
, (void *)(uintptr_t)sge
->addr
,
666 } else if (mask
& WR_REG_MASK
) {
668 wqe
->state
= wqe_state_posted
;
671 memcpy(wqe
->dma
.sge
, ibwr
->sg_list
,
672 num_sge
* sizeof(struct ib_sge
));
674 wqe
->iova
= mask
& WR_ATOMIC_MASK
? atomic_wr(ibwr
)->remote_addr
:
675 mask
& WR_READ_OR_WRITE_MASK
? rdma_wr(ibwr
)->remote_addr
: 0;
677 wqe
->dma
.length
= length
;
678 wqe
->dma
.resid
= length
;
679 wqe
->dma
.num_sge
= num_sge
;
680 wqe
->dma
.cur_sge
= 0;
681 wqe
->dma
.sge_offset
= 0;
682 wqe
->state
= wqe_state_posted
;
683 wqe
->ssn
= atomic_add_return(1, &qp
->ssn
);
688 static int post_one_send(struct rxe_qp
*qp
, const struct ib_send_wr
*ibwr
,
689 unsigned int mask
, u32 length
)
692 struct rxe_sq
*sq
= &qp
->sq
;
693 struct rxe_send_wqe
*send_wqe
;
696 err
= validate_send_wr(qp
, ibwr
, mask
, length
);
700 spin_lock_irqsave(&qp
->sq
.sq_lock
, flags
);
702 if (unlikely(queue_full(sq
->queue
))) {
707 send_wqe
= producer_addr(sq
->queue
);
709 err
= init_send_wqe(qp
, ibwr
, mask
, length
, send_wqe
);
714 * make sure all changes to the work queue are
715 * written before we update the producer pointer
719 advance_producer(sq
->queue
);
720 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
725 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
729 static int rxe_post_send_kernel(struct rxe_qp
*qp
, const struct ib_send_wr
*wr
,
730 const struct ib_send_wr
**bad_wr
)
734 unsigned int length
= 0;
738 mask
= wr_opcode_mask(wr
->opcode
, qp
);
739 if (unlikely(!mask
)) {
745 if (unlikely((wr
->send_flags
& IB_SEND_INLINE
) &&
746 !(mask
& WR_INLINE_MASK
))) {
753 for (i
= 0; i
< wr
->num_sge
; i
++)
754 length
+= wr
->sg_list
[i
].length
;
756 err
= post_one_send(qp
, wr
, mask
, length
);
765 rxe_run_task(&qp
->req
.task
, 1);
766 if (unlikely(qp
->req
.state
== QP_STATE_ERROR
))
767 rxe_run_task(&qp
->comp
.task
, 1);
772 static int rxe_post_send(struct ib_qp
*ibqp
, const struct ib_send_wr
*wr
,
773 const struct ib_send_wr
**bad_wr
)
775 struct rxe_qp
*qp
= to_rqp(ibqp
);
777 if (unlikely(!qp
->valid
)) {
782 if (unlikely(qp
->req
.state
< QP_STATE_READY
)) {
788 /* Utilize process context to do protocol processing */
789 rxe_run_task(&qp
->req
.task
, 0);
792 return rxe_post_send_kernel(qp
, wr
, bad_wr
);
795 static int rxe_post_recv(struct ib_qp
*ibqp
, const struct ib_recv_wr
*wr
,
796 const struct ib_recv_wr
**bad_wr
)
799 struct rxe_qp
*qp
= to_rqp(ibqp
);
800 struct rxe_rq
*rq
= &qp
->rq
;
803 if (unlikely((qp_state(qp
) < IB_QPS_INIT
) || !qp
->valid
)) {
809 if (unlikely(qp
->srq
)) {
815 spin_lock_irqsave(&rq
->producer_lock
, flags
);
818 err
= post_one_recv(rq
, wr
);
826 spin_unlock_irqrestore(&rq
->producer_lock
, flags
);
828 if (qp
->resp
.state
== QP_STATE_ERROR
)
829 rxe_run_task(&qp
->resp
.task
, 1);
835 static struct ib_cq
*rxe_create_cq(struct ib_device
*dev
,
836 const struct ib_cq_init_attr
*attr
,
837 struct ib_ucontext
*context
,
838 struct ib_udata
*udata
)
841 struct rxe_dev
*rxe
= to_rdev(dev
);
843 struct rxe_create_cq_resp __user
*uresp
= NULL
;
846 if (udata
->outlen
< sizeof(*uresp
))
847 return ERR_PTR(-EINVAL
);
848 uresp
= udata
->outbuf
;
852 return ERR_PTR(-EINVAL
);
854 err
= rxe_cq_chk_attr(rxe
, NULL
, attr
->cqe
, attr
->comp_vector
);
858 cq
= rxe_alloc(&rxe
->cq_pool
);
864 err
= rxe_cq_from_init(rxe
, cq
, attr
->cqe
, attr
->comp_vector
,
877 static int rxe_destroy_cq(struct ib_cq
*ibcq
)
879 struct rxe_cq
*cq
= to_rcq(ibcq
);
887 static int rxe_resize_cq(struct ib_cq
*ibcq
, int cqe
, struct ib_udata
*udata
)
890 struct rxe_cq
*cq
= to_rcq(ibcq
);
891 struct rxe_dev
*rxe
= to_rdev(ibcq
->device
);
892 struct rxe_resize_cq_resp __user
*uresp
= NULL
;
895 if (udata
->outlen
< sizeof(*uresp
))
897 uresp
= udata
->outbuf
;
900 err
= rxe_cq_chk_attr(rxe
, cq
, cqe
, 0);
904 err
= rxe_cq_resize_queue(cq
, cqe
, uresp
);
914 static int rxe_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
917 struct rxe_cq
*cq
= to_rcq(ibcq
);
921 spin_lock_irqsave(&cq
->cq_lock
, flags
);
922 for (i
= 0; i
< num_entries
; i
++) {
923 cqe
= queue_head(cq
->queue
);
927 memcpy(wc
++, &cqe
->ibwc
, sizeof(*wc
));
928 advance_consumer(cq
->queue
);
930 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
935 static int rxe_peek_cq(struct ib_cq
*ibcq
, int wc_cnt
)
937 struct rxe_cq
*cq
= to_rcq(ibcq
);
938 int count
= queue_count(cq
->queue
);
940 return (count
> wc_cnt
) ? wc_cnt
: count
;
943 static int rxe_req_notify_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
945 struct rxe_cq
*cq
= to_rcq(ibcq
);
946 unsigned long irq_flags
;
949 spin_lock_irqsave(&cq
->cq_lock
, irq_flags
);
950 if (cq
->notify
!= IB_CQ_NEXT_COMP
)
951 cq
->notify
= flags
& IB_CQ_SOLICITED_MASK
;
953 if ((flags
& IB_CQ_REPORT_MISSED_EVENTS
) && !queue_empty(cq
->queue
))
956 spin_unlock_irqrestore(&cq
->cq_lock
, irq_flags
);
961 static struct ib_mr
*rxe_get_dma_mr(struct ib_pd
*ibpd
, int access
)
963 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
964 struct rxe_pd
*pd
= to_rpd(ibpd
);
968 mr
= rxe_alloc(&rxe
->mr_pool
);
978 err
= rxe_mem_init_dma(pd
, access
, mr
);
992 static struct ib_mr
*rxe_reg_user_mr(struct ib_pd
*ibpd
,
996 int access
, struct ib_udata
*udata
)
999 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1000 struct rxe_pd
*pd
= to_rpd(ibpd
);
1003 mr
= rxe_alloc(&rxe
->mr_pool
);
1013 err
= rxe_mem_init_user(pd
, start
, length
, iova
,
1025 return ERR_PTR(err
);
1028 static int rxe_dereg_mr(struct ib_mr
*ibmr
)
1030 struct rxe_mem
*mr
= to_rmr(ibmr
);
1032 mr
->state
= RXE_MEM_STATE_ZOMBIE
;
1033 rxe_drop_ref(mr
->pd
);
1039 static struct ib_mr
*rxe_alloc_mr(struct ib_pd
*ibpd
,
1040 enum ib_mr_type mr_type
,
1043 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1044 struct rxe_pd
*pd
= to_rpd(ibpd
);
1048 if (mr_type
!= IB_MR_TYPE_MEM_REG
)
1049 return ERR_PTR(-EINVAL
);
1051 mr
= rxe_alloc(&rxe
->mr_pool
);
1061 err
= rxe_mem_init_fast(pd
, max_num_sg
, mr
);
1072 return ERR_PTR(err
);
1075 static int rxe_set_page(struct ib_mr
*ibmr
, u64 addr
)
1077 struct rxe_mem
*mr
= to_rmr(ibmr
);
1078 struct rxe_map
*map
;
1079 struct rxe_phys_buf
*buf
;
1081 if (unlikely(mr
->nbuf
== mr
->num_buf
))
1084 map
= mr
->map
[mr
->nbuf
/ RXE_BUF_PER_MAP
];
1085 buf
= &map
->buf
[mr
->nbuf
% RXE_BUF_PER_MAP
];
1088 buf
->size
= ibmr
->page_size
;
1094 static int rxe_map_mr_sg(struct ib_mr
*ibmr
, struct scatterlist
*sg
,
1095 int sg_nents
, unsigned int *sg_offset
)
1097 struct rxe_mem
*mr
= to_rmr(ibmr
);
1102 n
= ib_sg_to_pages(ibmr
, sg
, sg_nents
, sg_offset
, rxe_set_page
);
1104 mr
->va
= ibmr
->iova
;
1105 mr
->iova
= ibmr
->iova
;
1106 mr
->length
= ibmr
->length
;
1107 mr
->page_shift
= ilog2(ibmr
->page_size
);
1108 mr
->page_mask
= ibmr
->page_size
- 1;
1109 mr
->offset
= mr
->iova
& mr
->page_mask
;
1114 static int rxe_attach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1117 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1118 struct rxe_qp
*qp
= to_rqp(ibqp
);
1119 struct rxe_mc_grp
*grp
;
1121 /* takes a ref on grp if successful */
1122 err
= rxe_mcast_get_grp(rxe
, mgid
, &grp
);
1126 err
= rxe_mcast_add_grp_elem(rxe
, qp
, grp
);
1132 static int rxe_detach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1134 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1135 struct rxe_qp
*qp
= to_rqp(ibqp
);
1137 return rxe_mcast_drop_grp_elem(rxe
, qp
, mgid
);
1140 static ssize_t
parent_show(struct device
*device
,
1141 struct device_attribute
*attr
, char *buf
)
1143 struct rxe_dev
*rxe
= container_of(device
, struct rxe_dev
,
1146 return snprintf(buf
, 16, "%s\n", rxe_parent_name(rxe
, 1));
1149 static DEVICE_ATTR_RO(parent
);
1151 static struct device_attribute
*rxe_dev_attributes
[] = {
1155 int rxe_register_device(struct rxe_dev
*rxe
)
1159 struct ib_device
*dev
= &rxe
->ib_dev
;
1160 struct crypto_shash
*tfm
;
1162 strlcpy(dev
->name
, "rxe%d", IB_DEVICE_NAME_MAX
);
1163 strlcpy(dev
->node_desc
, "rxe", sizeof(dev
->node_desc
));
1165 dev
->owner
= THIS_MODULE
;
1166 dev
->node_type
= RDMA_NODE_IB_CA
;
1167 dev
->phys_port_cnt
= 1;
1168 dev
->num_comp_vectors
= num_possible_cpus();
1169 dev
->dev
.parent
= rxe_dma_device(rxe
);
1170 dev
->local_dma_lkey
= 0;
1171 addrconf_addr_eui48((unsigned char *)&dev
->node_guid
,
1172 rxe
->ndev
->dev_addr
);
1173 dev
->dev
.dma_ops
= &dma_virt_ops
;
1174 dma_coerce_mask_and_coherent(&dev
->dev
,
1175 dma_get_required_mask(&dev
->dev
));
1177 dev
->uverbs_abi_ver
= RXE_UVERBS_ABI_VERSION
;
1178 dev
->uverbs_cmd_mask
= BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT
)
1179 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
)
1180 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE
)
1181 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT
)
1182 | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD
)
1183 | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD
)
1184 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ
)
1185 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ
)
1186 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ
)
1187 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ
)
1188 | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV
)
1189 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP
)
1190 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP
)
1191 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP
)
1192 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP
)
1193 | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND
)
1194 | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV
)
1195 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ
)
1196 | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ
)
1197 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ
)
1198 | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ
)
1199 | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ
)
1200 | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
)
1201 | BIT_ULL(IB_USER_VERBS_CMD_REG_MR
)
1202 | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR
)
1203 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH
)
1204 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH
)
1205 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH
)
1206 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH
)
1207 | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST
)
1208 | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST
)
1211 dev
->query_device
= rxe_query_device
;
1212 dev
->modify_device
= rxe_modify_device
;
1213 dev
->query_port
= rxe_query_port
;
1214 dev
->modify_port
= rxe_modify_port
;
1215 dev
->get_link_layer
= rxe_get_link_layer
;
1216 dev
->get_netdev
= rxe_get_netdev
;
1217 dev
->query_pkey
= rxe_query_pkey
;
1218 dev
->alloc_ucontext
= rxe_alloc_ucontext
;
1219 dev
->dealloc_ucontext
= rxe_dealloc_ucontext
;
1220 dev
->mmap
= rxe_mmap
;
1221 dev
->get_port_immutable
= rxe_port_immutable
;
1222 dev
->alloc_pd
= rxe_alloc_pd
;
1223 dev
->dealloc_pd
= rxe_dealloc_pd
;
1224 dev
->create_ah
= rxe_create_ah
;
1225 dev
->modify_ah
= rxe_modify_ah
;
1226 dev
->query_ah
= rxe_query_ah
;
1227 dev
->destroy_ah
= rxe_destroy_ah
;
1228 dev
->create_srq
= rxe_create_srq
;
1229 dev
->modify_srq
= rxe_modify_srq
;
1230 dev
->query_srq
= rxe_query_srq
;
1231 dev
->destroy_srq
= rxe_destroy_srq
;
1232 dev
->post_srq_recv
= rxe_post_srq_recv
;
1233 dev
->create_qp
= rxe_create_qp
;
1234 dev
->modify_qp
= rxe_modify_qp
;
1235 dev
->query_qp
= rxe_query_qp
;
1236 dev
->destroy_qp
= rxe_destroy_qp
;
1237 dev
->post_send
= rxe_post_send
;
1238 dev
->post_recv
= rxe_post_recv
;
1239 dev
->create_cq
= rxe_create_cq
;
1240 dev
->destroy_cq
= rxe_destroy_cq
;
1241 dev
->resize_cq
= rxe_resize_cq
;
1242 dev
->poll_cq
= rxe_poll_cq
;
1243 dev
->peek_cq
= rxe_peek_cq
;
1244 dev
->req_notify_cq
= rxe_req_notify_cq
;
1245 dev
->get_dma_mr
= rxe_get_dma_mr
;
1246 dev
->reg_user_mr
= rxe_reg_user_mr
;
1247 dev
->dereg_mr
= rxe_dereg_mr
;
1248 dev
->alloc_mr
= rxe_alloc_mr
;
1249 dev
->map_mr_sg
= rxe_map_mr_sg
;
1250 dev
->attach_mcast
= rxe_attach_mcast
;
1251 dev
->detach_mcast
= rxe_detach_mcast
;
1252 dev
->get_hw_stats
= rxe_ib_get_hw_stats
;
1253 dev
->alloc_hw_stats
= rxe_ib_alloc_hw_stats
;
1255 tfm
= crypto_alloc_shash("crc32", 0, 0);
1257 pr_err("failed to allocate crc algorithm err:%ld\n",
1259 return PTR_ERR(tfm
);
1263 dev
->driver_id
= RDMA_DRIVER_RXE
;
1264 err
= ib_register_device(dev
, NULL
);
1266 pr_warn("%s failed with error %d\n", __func__
, err
);
1270 for (i
= 0; i
< ARRAY_SIZE(rxe_dev_attributes
); ++i
) {
1271 err
= device_create_file(&dev
->dev
, rxe_dev_attributes
[i
]);
1273 pr_warn("%s failed with error %d for attr number %d\n",
1282 ib_unregister_device(dev
);
1284 crypto_free_shash(rxe
->tfm
);
1289 int rxe_unregister_device(struct rxe_dev
*rxe
)
1292 struct ib_device
*dev
= &rxe
->ib_dev
;
1294 for (i
= 0; i
< ARRAY_SIZE(rxe_dev_attributes
); ++i
)
1295 device_remove_file(&dev
->dev
, rxe_dev_attributes
[i
]);
1297 ib_unregister_device(dev
);