2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include "rxe_queue.h"
38 static int rxe_query_device(struct ib_device
*dev
,
39 struct ib_device_attr
*attr
,
42 struct rxe_dev
*rxe
= to_rdev(dev
);
44 if (uhw
->inlen
|| uhw
->outlen
)
51 static void rxe_eth_speed_to_ib_speed(int speed
, u8
*active_speed
,
55 *active_width
= IB_WIDTH_1X
;
56 *active_speed
= IB_SPEED_SDR
;
57 } else if (speed
<= 10000) {
58 *active_width
= IB_WIDTH_1X
;
59 *active_speed
= IB_SPEED_FDR10
;
60 } else if (speed
<= 20000) {
61 *active_width
= IB_WIDTH_4X
;
62 *active_speed
= IB_SPEED_DDR
;
63 } else if (speed
<= 30000) {
64 *active_width
= IB_WIDTH_4X
;
65 *active_speed
= IB_SPEED_QDR
;
66 } else if (speed
<= 40000) {
67 *active_width
= IB_WIDTH_4X
;
68 *active_speed
= IB_SPEED_FDR10
;
70 *active_width
= IB_WIDTH_4X
;
71 *active_speed
= IB_SPEED_EDR
;
75 static int rxe_query_port(struct ib_device
*dev
,
76 u8 port_num
, struct ib_port_attr
*attr
)
78 struct rxe_dev
*rxe
= to_rdev(dev
);
79 struct rxe_port
*port
;
82 if (unlikely(port_num
!= 1)) {
83 pr_warn("invalid port_number %d\n", port_num
);
91 mutex_lock(&rxe
->usdev_lock
);
92 if (rxe
->ndev
->ethtool_ops
->get_link_ksettings
) {
93 struct ethtool_link_ksettings ks
;
95 rxe
->ndev
->ethtool_ops
->get_link_ksettings(rxe
->ndev
, &ks
);
96 speed
= ks
.base
.speed
;
97 } else if (rxe
->ndev
->ethtool_ops
->get_settings
) {
98 struct ethtool_cmd cmd
;
100 rxe
->ndev
->ethtool_ops
->get_settings(rxe
->ndev
, &cmd
);
103 pr_warn("%s speed is unknown, defaulting to 1000\n", rxe
->ndev
->name
);
106 rxe_eth_speed_to_ib_speed(speed
, &attr
->active_speed
, &attr
->active_width
);
107 mutex_unlock(&rxe
->usdev_lock
);
115 static int rxe_query_gid(struct ib_device
*device
,
116 u8 port_num
, int index
, union ib_gid
*gid
)
120 if (index
> RXE_PORT_GID_TBL_LEN
)
123 ret
= ib_get_cached_gid(device
, port_num
, index
, gid
, NULL
);
124 if (ret
== -EAGAIN
) {
125 memcpy(gid
, &zgid
, sizeof(*gid
));
132 static int rxe_add_gid(struct ib_device
*device
, u8 port_num
, unsigned int
133 index
, const union ib_gid
*gid
,
134 const struct ib_gid_attr
*attr
, void **context
)
136 if (index
>= RXE_PORT_GID_TBL_LEN
)
141 static int rxe_del_gid(struct ib_device
*device
, u8 port_num
, unsigned int
142 index
, void **context
)
144 if (index
>= RXE_PORT_GID_TBL_LEN
)
149 static struct net_device
*rxe_get_netdev(struct ib_device
*device
,
152 struct rxe_dev
*rxe
= to_rdev(device
);
162 static int rxe_query_pkey(struct ib_device
*device
,
163 u8 port_num
, u16 index
, u16
*pkey
)
165 struct rxe_dev
*rxe
= to_rdev(device
);
166 struct rxe_port
*port
;
168 if (unlikely(port_num
!= 1)) {
169 dev_warn(device
->dma_device
, "invalid port_num = %d\n",
176 if (unlikely(index
>= port
->attr
.pkey_tbl_len
)) {
177 dev_warn(device
->dma_device
, "invalid index = %d\n",
182 *pkey
= port
->pkey_tbl
[index
];
189 static int rxe_modify_device(struct ib_device
*dev
,
190 int mask
, struct ib_device_modify
*attr
)
192 struct rxe_dev
*rxe
= to_rdev(dev
);
194 if (mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
)
195 rxe
->attr
.sys_image_guid
= cpu_to_be64(attr
->sys_image_guid
);
197 if (mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
198 memcpy(rxe
->ib_dev
.node_desc
,
199 attr
->node_desc
, sizeof(rxe
->ib_dev
.node_desc
));
205 static int rxe_modify_port(struct ib_device
*dev
,
206 u8 port_num
, int mask
, struct ib_port_modify
*attr
)
208 struct rxe_dev
*rxe
= to_rdev(dev
);
209 struct rxe_port
*port
;
211 if (unlikely(port_num
!= 1)) {
212 pr_warn("invalid port_num = %d\n", port_num
);
218 port
->attr
.port_cap_flags
|= attr
->set_port_cap_mask
;
219 port
->attr
.port_cap_flags
&= ~attr
->clr_port_cap_mask
;
221 if (mask
& IB_PORT_RESET_QKEY_CNTR
)
222 port
->attr
.qkey_viol_cntr
= 0;
230 static enum rdma_link_layer
rxe_get_link_layer(struct ib_device
*dev
,
233 struct rxe_dev
*rxe
= to_rdev(dev
);
235 return rxe
->ifc_ops
->link_layer(rxe
, port_num
);
238 static struct ib_ucontext
*rxe_alloc_ucontext(struct ib_device
*dev
,
239 struct ib_udata
*udata
)
241 struct rxe_dev
*rxe
= to_rdev(dev
);
242 struct rxe_ucontext
*uc
;
244 uc
= rxe_alloc(&rxe
->uc_pool
);
245 return uc
? &uc
->ibuc
: ERR_PTR(-ENOMEM
);
248 static int rxe_dealloc_ucontext(struct ib_ucontext
*ibuc
)
250 struct rxe_ucontext
*uc
= to_ruc(ibuc
);
256 static int rxe_port_immutable(struct ib_device
*dev
, u8 port_num
,
257 struct ib_port_immutable
*immutable
)
260 struct ib_port_attr attr
;
262 err
= rxe_query_port(dev
, port_num
, &attr
);
266 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
267 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
268 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP
;
269 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
274 static struct ib_pd
*rxe_alloc_pd(struct ib_device
*dev
,
275 struct ib_ucontext
*context
,
276 struct ib_udata
*udata
)
278 struct rxe_dev
*rxe
= to_rdev(dev
);
281 pd
= rxe_alloc(&rxe
->pd_pool
);
282 return pd
? &pd
->ibpd
: ERR_PTR(-ENOMEM
);
285 static int rxe_dealloc_pd(struct ib_pd
*ibpd
)
287 struct rxe_pd
*pd
= to_rpd(ibpd
);
293 static int rxe_init_av(struct rxe_dev
*rxe
, struct ib_ah_attr
*attr
,
298 struct ib_gid_attr sgid_attr
;
300 err
= ib_get_cached_gid(&rxe
->ib_dev
, attr
->port_num
,
301 attr
->grh
.sgid_index
, &sgid
,
304 pr_err("Failed to query sgid. err = %d\n", err
);
308 err
= rxe_av_from_attr(rxe
, attr
->port_num
, av
, attr
);
310 err
= rxe_av_fill_ip_info(rxe
, av
, attr
, &sgid_attr
, &sgid
);
313 dev_put(sgid_attr
.ndev
);
317 static struct ib_ah
*rxe_create_ah(struct ib_pd
*ibpd
, struct ib_ah_attr
*attr
)
320 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
321 struct rxe_pd
*pd
= to_rpd(ibpd
);
324 err
= rxe_av_chk_attr(rxe
, attr
);
328 ah
= rxe_alloc(&rxe
->ah_pool
);
337 err
= rxe_init_av(rxe
, attr
, &ah
->av
);
350 static int rxe_modify_ah(struct ib_ah
*ibah
, struct ib_ah_attr
*attr
)
353 struct rxe_dev
*rxe
= to_rdev(ibah
->device
);
354 struct rxe_ah
*ah
= to_rah(ibah
);
356 err
= rxe_av_chk_attr(rxe
, attr
);
360 err
= rxe_init_av(rxe
, attr
, &ah
->av
);
367 static int rxe_query_ah(struct ib_ah
*ibah
, struct ib_ah_attr
*attr
)
369 struct rxe_dev
*rxe
= to_rdev(ibah
->device
);
370 struct rxe_ah
*ah
= to_rah(ibah
);
372 rxe_av_to_attr(rxe
, &ah
->av
, attr
);
376 static int rxe_destroy_ah(struct ib_ah
*ibah
)
378 struct rxe_ah
*ah
= to_rah(ibah
);
380 rxe_drop_ref(ah
->pd
);
385 static int post_one_recv(struct rxe_rq
*rq
, struct ib_recv_wr
*ibwr
)
390 struct rxe_recv_wqe
*recv_wqe
;
391 int num_sge
= ibwr
->num_sge
;
393 if (unlikely(queue_full(rq
->queue
))) {
398 if (unlikely(num_sge
> rq
->max_sge
)) {
404 for (i
= 0; i
< num_sge
; i
++)
405 length
+= ibwr
->sg_list
[i
].length
;
407 recv_wqe
= producer_addr(rq
->queue
);
408 recv_wqe
->wr_id
= ibwr
->wr_id
;
409 recv_wqe
->num_sge
= num_sge
;
411 memcpy(recv_wqe
->dma
.sge
, ibwr
->sg_list
,
412 num_sge
* sizeof(struct ib_sge
));
414 recv_wqe
->dma
.length
= length
;
415 recv_wqe
->dma
.resid
= length
;
416 recv_wqe
->dma
.num_sge
= num_sge
;
417 recv_wqe
->dma
.cur_sge
= 0;
418 recv_wqe
->dma
.sge_offset
= 0;
420 /* make sure all changes to the work queue are written before we
421 * update the producer pointer
425 advance_producer(rq
->queue
);
432 static struct ib_srq
*rxe_create_srq(struct ib_pd
*ibpd
,
433 struct ib_srq_init_attr
*init
,
434 struct ib_udata
*udata
)
437 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
438 struct rxe_pd
*pd
= to_rpd(ibpd
);
440 struct ib_ucontext
*context
= udata
? ibpd
->uobject
->context
: NULL
;
442 err
= rxe_srq_chk_attr(rxe
, NULL
, &init
->attr
, IB_SRQ_INIT_MASK
);
446 srq
= rxe_alloc(&rxe
->srq_pool
);
456 err
= rxe_srq_from_init(rxe
, srq
, init
, context
, udata
);
470 static int rxe_modify_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
,
471 enum ib_srq_attr_mask mask
,
472 struct ib_udata
*udata
)
475 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
476 struct rxe_dev
*rxe
= to_rdev(ibsrq
->device
);
478 err
= rxe_srq_chk_attr(rxe
, srq
, attr
, mask
);
482 err
= rxe_srq_from_attr(rxe
, srq
, attr
, mask
, udata
);
492 static int rxe_query_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
)
494 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
499 attr
->max_wr
= srq
->rq
.queue
->buf
->index_mask
;
500 attr
->max_sge
= srq
->rq
.max_sge
;
501 attr
->srq_limit
= srq
->limit
;
505 static int rxe_destroy_srq(struct ib_srq
*ibsrq
)
507 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
510 rxe_queue_cleanup(srq
->rq
.queue
);
512 rxe_drop_ref(srq
->pd
);
519 static int rxe_post_srq_recv(struct ib_srq
*ibsrq
, struct ib_recv_wr
*wr
,
520 struct ib_recv_wr
**bad_wr
)
524 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
526 spin_lock_irqsave(&srq
->rq
.producer_lock
, flags
);
529 err
= post_one_recv(&srq
->rq
, wr
);
535 spin_unlock_irqrestore(&srq
->rq
.producer_lock
, flags
);
543 static struct ib_qp
*rxe_create_qp(struct ib_pd
*ibpd
,
544 struct ib_qp_init_attr
*init
,
545 struct ib_udata
*udata
)
548 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
549 struct rxe_pd
*pd
= to_rpd(ibpd
);
552 err
= rxe_qp_chk_init(rxe
, init
);
556 qp
= rxe_alloc(&rxe
->qp_pool
);
572 err
= rxe_qp_from_init(rxe
, qp
, pd
, init
, udata
, ibpd
);
585 static int rxe_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
586 int mask
, struct ib_udata
*udata
)
589 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
590 struct rxe_qp
*qp
= to_rqp(ibqp
);
592 err
= rxe_qp_chk_attr(rxe
, qp
, attr
, mask
);
596 err
= rxe_qp_from_attr(qp
, attr
, mask
, udata
);
606 static int rxe_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
607 int mask
, struct ib_qp_init_attr
*init
)
609 struct rxe_qp
*qp
= to_rqp(ibqp
);
611 rxe_qp_to_init(qp
, init
);
612 rxe_qp_to_attr(qp
, attr
, mask
);
617 static int rxe_destroy_qp(struct ib_qp
*ibqp
)
619 struct rxe_qp
*qp
= to_rqp(ibqp
);
627 static int validate_send_wr(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
628 unsigned int mask
, unsigned int length
)
630 int num_sge
= ibwr
->num_sge
;
631 struct rxe_sq
*sq
= &qp
->sq
;
633 if (unlikely(num_sge
> sq
->max_sge
))
636 if (unlikely(mask
& WR_ATOMIC_MASK
)) {
640 if (atomic_wr(ibwr
)->remote_addr
& 0x7)
644 if (unlikely((ibwr
->send_flags
& IB_SEND_INLINE
) &&
645 (length
> sq
->max_inline
)))
654 static void init_send_wr(struct rxe_qp
*qp
, struct rxe_send_wr
*wr
,
655 struct ib_send_wr
*ibwr
)
657 wr
->wr_id
= ibwr
->wr_id
;
658 wr
->num_sge
= ibwr
->num_sge
;
659 wr
->opcode
= ibwr
->opcode
;
660 wr
->send_flags
= ibwr
->send_flags
;
662 if (qp_type(qp
) == IB_QPT_UD
||
663 qp_type(qp
) == IB_QPT_SMI
||
664 qp_type(qp
) == IB_QPT_GSI
) {
665 wr
->wr
.ud
.remote_qpn
= ud_wr(ibwr
)->remote_qpn
;
666 wr
->wr
.ud
.remote_qkey
= ud_wr(ibwr
)->remote_qkey
;
667 if (qp_type(qp
) == IB_QPT_GSI
)
668 wr
->wr
.ud
.pkey_index
= ud_wr(ibwr
)->pkey_index
;
669 if (wr
->opcode
== IB_WR_SEND_WITH_IMM
)
670 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
672 switch (wr
->opcode
) {
673 case IB_WR_RDMA_WRITE_WITH_IMM
:
674 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
675 case IB_WR_RDMA_READ
:
676 case IB_WR_RDMA_WRITE
:
677 wr
->wr
.rdma
.remote_addr
= rdma_wr(ibwr
)->remote_addr
;
678 wr
->wr
.rdma
.rkey
= rdma_wr(ibwr
)->rkey
;
680 case IB_WR_SEND_WITH_IMM
:
681 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
683 case IB_WR_SEND_WITH_INV
:
684 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
686 case IB_WR_ATOMIC_CMP_AND_SWP
:
687 case IB_WR_ATOMIC_FETCH_AND_ADD
:
688 wr
->wr
.atomic
.remote_addr
=
689 atomic_wr(ibwr
)->remote_addr
;
690 wr
->wr
.atomic
.compare_add
=
691 atomic_wr(ibwr
)->compare_add
;
692 wr
->wr
.atomic
.swap
= atomic_wr(ibwr
)->swap
;
693 wr
->wr
.atomic
.rkey
= atomic_wr(ibwr
)->rkey
;
695 case IB_WR_LOCAL_INV
:
696 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
699 wr
->wr
.reg
.mr
= reg_wr(ibwr
)->mr
;
700 wr
->wr
.reg
.key
= reg_wr(ibwr
)->key
;
701 wr
->wr
.reg
.access
= reg_wr(ibwr
)->access
;
709 static int init_send_wqe(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
710 unsigned int mask
, unsigned int length
,
711 struct rxe_send_wqe
*wqe
)
713 int num_sge
= ibwr
->num_sge
;
718 init_send_wr(qp
, &wqe
->wr
, ibwr
);
720 if (qp_type(qp
) == IB_QPT_UD
||
721 qp_type(qp
) == IB_QPT_SMI
||
722 qp_type(qp
) == IB_QPT_GSI
)
723 memcpy(&wqe
->av
, &to_rah(ud_wr(ibwr
)->ah
)->av
, sizeof(wqe
->av
));
725 if (unlikely(ibwr
->send_flags
& IB_SEND_INLINE
)) {
726 p
= wqe
->dma
.inline_data
;
729 for (i
= 0; i
< num_sge
; i
++, sge
++) {
730 if (qp
->is_user
&& copy_from_user(p
, (__user
void *)
731 (uintptr_t)sge
->addr
, sge
->length
))
734 else if (!qp
->is_user
)
735 memcpy(p
, (void *)(uintptr_t)sge
->addr
,
740 } else if (mask
& WR_REG_MASK
) {
742 wqe
->state
= wqe_state_posted
;
745 memcpy(wqe
->dma
.sge
, ibwr
->sg_list
,
746 num_sge
* sizeof(struct ib_sge
));
748 wqe
->iova
= (mask
& WR_ATOMIC_MASK
) ?
749 atomic_wr(ibwr
)->remote_addr
:
750 rdma_wr(ibwr
)->remote_addr
;
752 wqe
->dma
.length
= length
;
753 wqe
->dma
.resid
= length
;
754 wqe
->dma
.num_sge
= num_sge
;
755 wqe
->dma
.cur_sge
= 0;
756 wqe
->dma
.sge_offset
= 0;
757 wqe
->state
= wqe_state_posted
;
758 wqe
->ssn
= atomic_add_return(1, &qp
->ssn
);
763 static int post_one_send(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
764 unsigned mask
, u32 length
)
767 struct rxe_sq
*sq
= &qp
->sq
;
768 struct rxe_send_wqe
*send_wqe
;
771 err
= validate_send_wr(qp
, ibwr
, mask
, length
);
775 spin_lock_irqsave(&qp
->sq
.sq_lock
, flags
);
777 if (unlikely(queue_full(sq
->queue
))) {
782 send_wqe
= producer_addr(sq
->queue
);
784 err
= init_send_wqe(qp
, ibwr
, mask
, length
, send_wqe
);
789 * make sure all changes to the work queue are
790 * written before we update the producer pointer
794 advance_producer(sq
->queue
);
795 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
800 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
804 static int rxe_post_send(struct ib_qp
*ibqp
, struct ib_send_wr
*wr
,
805 struct ib_send_wr
**bad_wr
)
808 struct rxe_qp
*qp
= to_rqp(ibqp
);
810 unsigned int length
= 0;
814 if (unlikely(!qp
->valid
)) {
819 if (unlikely(qp
->req
.state
< QP_STATE_READY
)) {
825 mask
= wr_opcode_mask(wr
->opcode
, qp
);
826 if (unlikely(!mask
)) {
832 if (unlikely((wr
->send_flags
& IB_SEND_INLINE
) &&
833 !(mask
& WR_INLINE_MASK
))) {
840 for (i
= 0; i
< wr
->num_sge
; i
++)
841 length
+= wr
->sg_list
[i
].length
;
843 err
= post_one_send(qp
, wr
, mask
, length
);
853 * Must sched in case of GSI QP because ib_send_mad() hold irq lock,
854 * and the requester call ip_local_out_sk() that takes spin_lock_bh.
856 must_sched
= (qp_type(qp
) == IB_QPT_GSI
) ||
857 (queue_count(qp
->sq
.queue
) > 1);
859 rxe_run_task(&qp
->req
.task
, must_sched
);
864 static int rxe_post_recv(struct ib_qp
*ibqp
, struct ib_recv_wr
*wr
,
865 struct ib_recv_wr
**bad_wr
)
868 struct rxe_qp
*qp
= to_rqp(ibqp
);
869 struct rxe_rq
*rq
= &qp
->rq
;
872 if (unlikely((qp_state(qp
) < IB_QPS_INIT
) || !qp
->valid
)) {
878 if (unlikely(qp
->srq
)) {
884 spin_lock_irqsave(&rq
->producer_lock
, flags
);
887 err
= post_one_recv(rq
, wr
);
895 spin_unlock_irqrestore(&rq
->producer_lock
, flags
);
901 static struct ib_cq
*rxe_create_cq(struct ib_device
*dev
,
902 const struct ib_cq_init_attr
*attr
,
903 struct ib_ucontext
*context
,
904 struct ib_udata
*udata
)
907 struct rxe_dev
*rxe
= to_rdev(dev
);
911 return ERR_PTR(-EINVAL
);
913 err
= rxe_cq_chk_attr(rxe
, NULL
, attr
->cqe
, attr
->comp_vector
, udata
);
917 cq
= rxe_alloc(&rxe
->cq_pool
);
923 err
= rxe_cq_from_init(rxe
, cq
, attr
->cqe
, attr
->comp_vector
,
936 static int rxe_destroy_cq(struct ib_cq
*ibcq
)
938 struct rxe_cq
*cq
= to_rcq(ibcq
);
944 static int rxe_resize_cq(struct ib_cq
*ibcq
, int cqe
, struct ib_udata
*udata
)
947 struct rxe_cq
*cq
= to_rcq(ibcq
);
948 struct rxe_dev
*rxe
= to_rdev(ibcq
->device
);
950 err
= rxe_cq_chk_attr(rxe
, cq
, cqe
, 0, udata
);
954 err
= rxe_cq_resize_queue(cq
, cqe
, udata
);
964 static int rxe_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
967 struct rxe_cq
*cq
= to_rcq(ibcq
);
971 spin_lock_irqsave(&cq
->cq_lock
, flags
);
972 for (i
= 0; i
< num_entries
; i
++) {
973 cqe
= queue_head(cq
->queue
);
977 memcpy(wc
++, &cqe
->ibwc
, sizeof(*wc
));
978 advance_consumer(cq
->queue
);
980 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
985 static int rxe_peek_cq(struct ib_cq
*ibcq
, int wc_cnt
)
987 struct rxe_cq
*cq
= to_rcq(ibcq
);
988 int count
= queue_count(cq
->queue
);
990 return (count
> wc_cnt
) ? wc_cnt
: count
;
993 static int rxe_req_notify_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
995 struct rxe_cq
*cq
= to_rcq(ibcq
);
997 if (cq
->notify
!= IB_CQ_NEXT_COMP
)
998 cq
->notify
= flags
& IB_CQ_SOLICITED_MASK
;
1003 static struct ib_mr
*rxe_get_dma_mr(struct ib_pd
*ibpd
, int access
)
1005 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1006 struct rxe_pd
*pd
= to_rpd(ibpd
);
1010 mr
= rxe_alloc(&rxe
->mr_pool
);
1020 err
= rxe_mem_init_dma(rxe
, pd
, access
, mr
);
1031 return ERR_PTR(err
);
1034 static struct ib_mr
*rxe_reg_user_mr(struct ib_pd
*ibpd
,
1038 int access
, struct ib_udata
*udata
)
1041 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1042 struct rxe_pd
*pd
= to_rpd(ibpd
);
1045 mr
= rxe_alloc(&rxe
->mr_pool
);
1055 err
= rxe_mem_init_user(rxe
, pd
, start
, length
, iova
,
1067 return ERR_PTR(err
);
1070 static int rxe_dereg_mr(struct ib_mr
*ibmr
)
1072 struct rxe_mem
*mr
= to_rmr(ibmr
);
1074 mr
->state
= RXE_MEM_STATE_ZOMBIE
;
1075 rxe_drop_ref(mr
->pd
);
1081 static struct ib_mr
*rxe_alloc_mr(struct ib_pd
*ibpd
,
1082 enum ib_mr_type mr_type
,
1085 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1086 struct rxe_pd
*pd
= to_rpd(ibpd
);
1090 if (mr_type
!= IB_MR_TYPE_MEM_REG
)
1091 return ERR_PTR(-EINVAL
);
1093 mr
= rxe_alloc(&rxe
->mr_pool
);
1103 err
= rxe_mem_init_fast(rxe
, pd
, max_num_sg
, mr
);
1114 return ERR_PTR(err
);
1117 static int rxe_set_page(struct ib_mr
*ibmr
, u64 addr
)
1119 struct rxe_mem
*mr
= to_rmr(ibmr
);
1120 struct rxe_map
*map
;
1121 struct rxe_phys_buf
*buf
;
1123 if (unlikely(mr
->nbuf
== mr
->num_buf
))
1126 map
= mr
->map
[mr
->nbuf
/ RXE_BUF_PER_MAP
];
1127 buf
= &map
->buf
[mr
->nbuf
% RXE_BUF_PER_MAP
];
1130 buf
->size
= ibmr
->page_size
;
1136 static int rxe_map_mr_sg(struct ib_mr
*ibmr
, struct scatterlist
*sg
, int sg_nents
,
1137 unsigned int *sg_offset
)
1139 struct rxe_mem
*mr
= to_rmr(ibmr
);
1144 n
= ib_sg_to_pages(ibmr
, sg
, sg_nents
, sg_offset
, rxe_set_page
);
1146 mr
->va
= ibmr
->iova
;
1147 mr
->iova
= ibmr
->iova
;
1148 mr
->length
= ibmr
->length
;
1149 mr
->page_shift
= ilog2(ibmr
->page_size
);
1150 mr
->page_mask
= ibmr
->page_size
- 1;
1151 mr
->offset
= mr
->iova
& mr
->page_mask
;
1156 static int rxe_attach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1159 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1160 struct rxe_qp
*qp
= to_rqp(ibqp
);
1161 struct rxe_mc_grp
*grp
;
1163 /* takes a ref on grp if successful */
1164 err
= rxe_mcast_get_grp(rxe
, mgid
, &grp
);
1168 err
= rxe_mcast_add_grp_elem(rxe
, qp
, grp
);
1174 static int rxe_detach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1176 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1177 struct rxe_qp
*qp
= to_rqp(ibqp
);
1179 return rxe_mcast_drop_grp_elem(rxe
, qp
, mgid
);
1182 static ssize_t
rxe_show_parent(struct device
*device
,
1183 struct device_attribute
*attr
, char *buf
)
1185 struct rxe_dev
*rxe
= container_of(device
, struct rxe_dev
,
1189 name
= rxe
->ifc_ops
->parent_name(rxe
, 1);
1190 return snprintf(buf
, 16, "%s\n", name
);
1193 static DEVICE_ATTR(parent
, S_IRUGO
, rxe_show_parent
, NULL
);
1195 static struct device_attribute
*rxe_dev_attributes
[] = {
1199 int rxe_register_device(struct rxe_dev
*rxe
)
1203 struct ib_device
*dev
= &rxe
->ib_dev
;
1205 strlcpy(dev
->name
, "rxe%d", IB_DEVICE_NAME_MAX
);
1206 strlcpy(dev
->node_desc
, "rxe", sizeof(dev
->node_desc
));
1208 dev
->owner
= THIS_MODULE
;
1209 dev
->node_type
= RDMA_NODE_IB_CA
;
1210 dev
->phys_port_cnt
= 1;
1211 dev
->num_comp_vectors
= RXE_NUM_COMP_VECTORS
;
1212 dev
->dma_device
= rxe
->ifc_ops
->dma_device(rxe
);
1213 dev
->local_dma_lkey
= 0;
1214 dev
->node_guid
= rxe
->ifc_ops
->node_guid(rxe
);
1215 dev
->dma_ops
= &rxe_dma_mapping_ops
;
1217 dev
->uverbs_abi_ver
= RXE_UVERBS_ABI_VERSION
;
1218 dev
->uverbs_cmd_mask
= BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT
)
1219 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
)
1220 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE
)
1221 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT
)
1222 | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD
)
1223 | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD
)
1224 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ
)
1225 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ
)
1226 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ
)
1227 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ
)
1228 | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV
)
1229 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP
)
1230 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP
)
1231 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP
)
1232 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP
)
1233 | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND
)
1234 | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV
)
1235 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ
)
1236 | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ
)
1237 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ
)
1238 | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ
)
1239 | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ
)
1240 | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
)
1241 | BIT_ULL(IB_USER_VERBS_CMD_REG_MR
)
1242 | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR
)
1243 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH
)
1244 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH
)
1245 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH
)
1246 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH
)
1247 | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST
)
1248 | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST
)
1251 dev
->query_device
= rxe_query_device
;
1252 dev
->modify_device
= rxe_modify_device
;
1253 dev
->query_port
= rxe_query_port
;
1254 dev
->modify_port
= rxe_modify_port
;
1255 dev
->get_link_layer
= rxe_get_link_layer
;
1256 dev
->query_gid
= rxe_query_gid
;
1257 dev
->get_netdev
= rxe_get_netdev
;
1258 dev
->add_gid
= rxe_add_gid
;
1259 dev
->del_gid
= rxe_del_gid
;
1260 dev
->query_pkey
= rxe_query_pkey
;
1261 dev
->alloc_ucontext
= rxe_alloc_ucontext
;
1262 dev
->dealloc_ucontext
= rxe_dealloc_ucontext
;
1263 dev
->mmap
= rxe_mmap
;
1264 dev
->get_port_immutable
= rxe_port_immutable
;
1265 dev
->alloc_pd
= rxe_alloc_pd
;
1266 dev
->dealloc_pd
= rxe_dealloc_pd
;
1267 dev
->create_ah
= rxe_create_ah
;
1268 dev
->modify_ah
= rxe_modify_ah
;
1269 dev
->query_ah
= rxe_query_ah
;
1270 dev
->destroy_ah
= rxe_destroy_ah
;
1271 dev
->create_srq
= rxe_create_srq
;
1272 dev
->modify_srq
= rxe_modify_srq
;
1273 dev
->query_srq
= rxe_query_srq
;
1274 dev
->destroy_srq
= rxe_destroy_srq
;
1275 dev
->post_srq_recv
= rxe_post_srq_recv
;
1276 dev
->create_qp
= rxe_create_qp
;
1277 dev
->modify_qp
= rxe_modify_qp
;
1278 dev
->query_qp
= rxe_query_qp
;
1279 dev
->destroy_qp
= rxe_destroy_qp
;
1280 dev
->post_send
= rxe_post_send
;
1281 dev
->post_recv
= rxe_post_recv
;
1282 dev
->create_cq
= rxe_create_cq
;
1283 dev
->destroy_cq
= rxe_destroy_cq
;
1284 dev
->resize_cq
= rxe_resize_cq
;
1285 dev
->poll_cq
= rxe_poll_cq
;
1286 dev
->peek_cq
= rxe_peek_cq
;
1287 dev
->req_notify_cq
= rxe_req_notify_cq
;
1288 dev
->get_dma_mr
= rxe_get_dma_mr
;
1289 dev
->reg_user_mr
= rxe_reg_user_mr
;
1290 dev
->dereg_mr
= rxe_dereg_mr
;
1291 dev
->alloc_mr
= rxe_alloc_mr
;
1292 dev
->map_mr_sg
= rxe_map_mr_sg
;
1293 dev
->attach_mcast
= rxe_attach_mcast
;
1294 dev
->detach_mcast
= rxe_detach_mcast
;
1296 err
= ib_register_device(dev
, NULL
);
1298 pr_warn("rxe_register_device failed, err = %d\n", err
);
1302 for (i
= 0; i
< ARRAY_SIZE(rxe_dev_attributes
); ++i
) {
1303 err
= device_create_file(&dev
->dev
, rxe_dev_attributes
[i
]);
1305 pr_warn("device_create_file failed, i = %d, err = %d\n",
1314 ib_unregister_device(dev
);
1319 int rxe_unregister_device(struct rxe_dev
*rxe
)
1322 struct ib_device
*dev
= &rxe
->ib_dev
;
1324 for (i
= 0; i
< ARRAY_SIZE(rxe_dev_attributes
); ++i
)
1325 device_remove_file(&dev
->dev
, rxe_dev_attributes
[i
]);
1327 ib_unregister_device(dev
);