2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include "rxe_queue.h"
38 static int rxe_query_device(struct ib_device
*dev
,
39 struct ib_device_attr
*attr
,
42 struct rxe_dev
*rxe
= to_rdev(dev
);
44 if (uhw
->inlen
|| uhw
->outlen
)
51 static void rxe_eth_speed_to_ib_speed(int speed
, u8
*active_speed
,
55 *active_width
= IB_WIDTH_1X
;
56 *active_speed
= IB_SPEED_SDR
;
57 } else if (speed
<= 10000) {
58 *active_width
= IB_WIDTH_1X
;
59 *active_speed
= IB_SPEED_FDR10
;
60 } else if (speed
<= 20000) {
61 *active_width
= IB_WIDTH_4X
;
62 *active_speed
= IB_SPEED_DDR
;
63 } else if (speed
<= 30000) {
64 *active_width
= IB_WIDTH_4X
;
65 *active_speed
= IB_SPEED_QDR
;
66 } else if (speed
<= 40000) {
67 *active_width
= IB_WIDTH_4X
;
68 *active_speed
= IB_SPEED_FDR10
;
70 *active_width
= IB_WIDTH_4X
;
71 *active_speed
= IB_SPEED_EDR
;
75 static int rxe_query_port(struct ib_device
*dev
,
76 u8 port_num
, struct ib_port_attr
*attr
)
78 struct rxe_dev
*rxe
= to_rdev(dev
);
79 struct rxe_port
*port
;
82 if (unlikely(port_num
!= 1)) {
83 pr_warn("invalid port_number %d\n", port_num
);
91 mutex_lock(&rxe
->usdev_lock
);
92 if (rxe
->ndev
->ethtool_ops
->get_link_ksettings
) {
93 struct ethtool_link_ksettings ks
;
95 rxe
->ndev
->ethtool_ops
->get_link_ksettings(rxe
->ndev
, &ks
);
96 speed
= ks
.base
.speed
;
97 } else if (rxe
->ndev
->ethtool_ops
->get_settings
) {
98 struct ethtool_cmd cmd
;
100 rxe
->ndev
->ethtool_ops
->get_settings(rxe
->ndev
, &cmd
);
103 pr_warn("%s speed is unknown, defaulting to 1000\n",
107 rxe_eth_speed_to_ib_speed(speed
, &attr
->active_speed
,
108 &attr
->active_width
);
109 mutex_unlock(&rxe
->usdev_lock
);
117 static int rxe_query_gid(struct ib_device
*device
,
118 u8 port_num
, int index
, union ib_gid
*gid
)
122 if (index
> RXE_PORT_GID_TBL_LEN
)
125 ret
= ib_get_cached_gid(device
, port_num
, index
, gid
, NULL
);
126 if (ret
== -EAGAIN
) {
127 memcpy(gid
, &zgid
, sizeof(*gid
));
134 static int rxe_add_gid(struct ib_device
*device
, u8 port_num
, unsigned int
135 index
, const union ib_gid
*gid
,
136 const struct ib_gid_attr
*attr
, void **context
)
138 if (index
>= RXE_PORT_GID_TBL_LEN
)
143 static int rxe_del_gid(struct ib_device
*device
, u8 port_num
, unsigned int
144 index
, void **context
)
146 if (index
>= RXE_PORT_GID_TBL_LEN
)
151 static struct net_device
*rxe_get_netdev(struct ib_device
*device
,
154 struct rxe_dev
*rxe
= to_rdev(device
);
164 static int rxe_query_pkey(struct ib_device
*device
,
165 u8 port_num
, u16 index
, u16
*pkey
)
167 struct rxe_dev
*rxe
= to_rdev(device
);
168 struct rxe_port
*port
;
170 if (unlikely(port_num
!= 1)) {
171 dev_warn(device
->dma_device
, "invalid port_num = %d\n",
178 if (unlikely(index
>= port
->attr
.pkey_tbl_len
)) {
179 dev_warn(device
->dma_device
, "invalid index = %d\n",
184 *pkey
= port
->pkey_tbl
[index
];
191 static int rxe_modify_device(struct ib_device
*dev
,
192 int mask
, struct ib_device_modify
*attr
)
194 struct rxe_dev
*rxe
= to_rdev(dev
);
196 if (mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
)
197 rxe
->attr
.sys_image_guid
= cpu_to_be64(attr
->sys_image_guid
);
199 if (mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
200 memcpy(rxe
->ib_dev
.node_desc
,
201 attr
->node_desc
, sizeof(rxe
->ib_dev
.node_desc
));
207 static int rxe_modify_port(struct ib_device
*dev
,
208 u8 port_num
, int mask
, struct ib_port_modify
*attr
)
210 struct rxe_dev
*rxe
= to_rdev(dev
);
211 struct rxe_port
*port
;
213 if (unlikely(port_num
!= 1)) {
214 pr_warn("invalid port_num = %d\n", port_num
);
220 port
->attr
.port_cap_flags
|= attr
->set_port_cap_mask
;
221 port
->attr
.port_cap_flags
&= ~attr
->clr_port_cap_mask
;
223 if (mask
& IB_PORT_RESET_QKEY_CNTR
)
224 port
->attr
.qkey_viol_cntr
= 0;
232 static enum rdma_link_layer
rxe_get_link_layer(struct ib_device
*dev
,
235 struct rxe_dev
*rxe
= to_rdev(dev
);
237 return rxe
->ifc_ops
->link_layer(rxe
, port_num
);
240 static struct ib_ucontext
*rxe_alloc_ucontext(struct ib_device
*dev
,
241 struct ib_udata
*udata
)
243 struct rxe_dev
*rxe
= to_rdev(dev
);
244 struct rxe_ucontext
*uc
;
246 uc
= rxe_alloc(&rxe
->uc_pool
);
247 return uc
? &uc
->ibuc
: ERR_PTR(-ENOMEM
);
250 static int rxe_dealloc_ucontext(struct ib_ucontext
*ibuc
)
252 struct rxe_ucontext
*uc
= to_ruc(ibuc
);
258 static int rxe_port_immutable(struct ib_device
*dev
, u8 port_num
,
259 struct ib_port_immutable
*immutable
)
262 struct ib_port_attr attr
;
264 err
= rxe_query_port(dev
, port_num
, &attr
);
268 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
269 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
270 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP
;
271 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
276 static struct ib_pd
*rxe_alloc_pd(struct ib_device
*dev
,
277 struct ib_ucontext
*context
,
278 struct ib_udata
*udata
)
280 struct rxe_dev
*rxe
= to_rdev(dev
);
283 pd
= rxe_alloc(&rxe
->pd_pool
);
284 return pd
? &pd
->ibpd
: ERR_PTR(-ENOMEM
);
287 static int rxe_dealloc_pd(struct ib_pd
*ibpd
)
289 struct rxe_pd
*pd
= to_rpd(ibpd
);
295 static int rxe_init_av(struct rxe_dev
*rxe
, struct ib_ah_attr
*attr
,
300 struct ib_gid_attr sgid_attr
;
302 err
= ib_get_cached_gid(&rxe
->ib_dev
, attr
->port_num
,
303 attr
->grh
.sgid_index
, &sgid
,
306 pr_err("Failed to query sgid. err = %d\n", err
);
310 err
= rxe_av_from_attr(rxe
, attr
->port_num
, av
, attr
);
312 err
= rxe_av_fill_ip_info(rxe
, av
, attr
, &sgid_attr
, &sgid
);
315 dev_put(sgid_attr
.ndev
);
319 static struct ib_ah
*rxe_create_ah(struct ib_pd
*ibpd
, struct ib_ah_attr
*attr
)
322 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
323 struct rxe_pd
*pd
= to_rpd(ibpd
);
326 err
= rxe_av_chk_attr(rxe
, attr
);
330 ah
= rxe_alloc(&rxe
->ah_pool
);
339 err
= rxe_init_av(rxe
, attr
, &ah
->av
);
352 static int rxe_modify_ah(struct ib_ah
*ibah
, struct ib_ah_attr
*attr
)
355 struct rxe_dev
*rxe
= to_rdev(ibah
->device
);
356 struct rxe_ah
*ah
= to_rah(ibah
);
358 err
= rxe_av_chk_attr(rxe
, attr
);
362 err
= rxe_init_av(rxe
, attr
, &ah
->av
);
369 static int rxe_query_ah(struct ib_ah
*ibah
, struct ib_ah_attr
*attr
)
371 struct rxe_dev
*rxe
= to_rdev(ibah
->device
);
372 struct rxe_ah
*ah
= to_rah(ibah
);
374 rxe_av_to_attr(rxe
, &ah
->av
, attr
);
378 static int rxe_destroy_ah(struct ib_ah
*ibah
)
380 struct rxe_ah
*ah
= to_rah(ibah
);
382 rxe_drop_ref(ah
->pd
);
387 static int post_one_recv(struct rxe_rq
*rq
, struct ib_recv_wr
*ibwr
)
392 struct rxe_recv_wqe
*recv_wqe
;
393 int num_sge
= ibwr
->num_sge
;
395 if (unlikely(queue_full(rq
->queue
))) {
400 if (unlikely(num_sge
> rq
->max_sge
)) {
406 for (i
= 0; i
< num_sge
; i
++)
407 length
+= ibwr
->sg_list
[i
].length
;
409 recv_wqe
= producer_addr(rq
->queue
);
410 recv_wqe
->wr_id
= ibwr
->wr_id
;
411 recv_wqe
->num_sge
= num_sge
;
413 memcpy(recv_wqe
->dma
.sge
, ibwr
->sg_list
,
414 num_sge
* sizeof(struct ib_sge
));
416 recv_wqe
->dma
.length
= length
;
417 recv_wqe
->dma
.resid
= length
;
418 recv_wqe
->dma
.num_sge
= num_sge
;
419 recv_wqe
->dma
.cur_sge
= 0;
420 recv_wqe
->dma
.sge_offset
= 0;
422 /* make sure all changes to the work queue are written before we
423 * update the producer pointer
427 advance_producer(rq
->queue
);
434 static struct ib_srq
*rxe_create_srq(struct ib_pd
*ibpd
,
435 struct ib_srq_init_attr
*init
,
436 struct ib_udata
*udata
)
439 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
440 struct rxe_pd
*pd
= to_rpd(ibpd
);
442 struct ib_ucontext
*context
= udata
? ibpd
->uobject
->context
: NULL
;
444 err
= rxe_srq_chk_attr(rxe
, NULL
, &init
->attr
, IB_SRQ_INIT_MASK
);
448 srq
= rxe_alloc(&rxe
->srq_pool
);
458 err
= rxe_srq_from_init(rxe
, srq
, init
, context
, udata
);
472 static int rxe_modify_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
,
473 enum ib_srq_attr_mask mask
,
474 struct ib_udata
*udata
)
477 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
478 struct rxe_dev
*rxe
= to_rdev(ibsrq
->device
);
480 err
= rxe_srq_chk_attr(rxe
, srq
, attr
, mask
);
484 err
= rxe_srq_from_attr(rxe
, srq
, attr
, mask
, udata
);
494 static int rxe_query_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
)
496 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
501 attr
->max_wr
= srq
->rq
.queue
->buf
->index_mask
;
502 attr
->max_sge
= srq
->rq
.max_sge
;
503 attr
->srq_limit
= srq
->limit
;
507 static int rxe_destroy_srq(struct ib_srq
*ibsrq
)
509 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
512 rxe_queue_cleanup(srq
->rq
.queue
);
514 rxe_drop_ref(srq
->pd
);
521 static int rxe_post_srq_recv(struct ib_srq
*ibsrq
, struct ib_recv_wr
*wr
,
522 struct ib_recv_wr
**bad_wr
)
526 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
528 spin_lock_irqsave(&srq
->rq
.producer_lock
, flags
);
531 err
= post_one_recv(&srq
->rq
, wr
);
537 spin_unlock_irqrestore(&srq
->rq
.producer_lock
, flags
);
545 static struct ib_qp
*rxe_create_qp(struct ib_pd
*ibpd
,
546 struct ib_qp_init_attr
*init
,
547 struct ib_udata
*udata
)
550 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
551 struct rxe_pd
*pd
= to_rpd(ibpd
);
554 err
= rxe_qp_chk_init(rxe
, init
);
558 qp
= rxe_alloc(&rxe
->qp_pool
);
574 err
= rxe_qp_from_init(rxe
, qp
, pd
, init
, udata
, ibpd
);
587 static int rxe_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
588 int mask
, struct ib_udata
*udata
)
591 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
592 struct rxe_qp
*qp
= to_rqp(ibqp
);
594 err
= rxe_qp_chk_attr(rxe
, qp
, attr
, mask
);
598 err
= rxe_qp_from_attr(qp
, attr
, mask
, udata
);
608 static int rxe_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
609 int mask
, struct ib_qp_init_attr
*init
)
611 struct rxe_qp
*qp
= to_rqp(ibqp
);
613 rxe_qp_to_init(qp
, init
);
614 rxe_qp_to_attr(qp
, attr
, mask
);
619 static int rxe_destroy_qp(struct ib_qp
*ibqp
)
621 struct rxe_qp
*qp
= to_rqp(ibqp
);
629 static int validate_send_wr(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
630 unsigned int mask
, unsigned int length
)
632 int num_sge
= ibwr
->num_sge
;
633 struct rxe_sq
*sq
= &qp
->sq
;
635 if (unlikely(num_sge
> sq
->max_sge
))
638 if (unlikely(mask
& WR_ATOMIC_MASK
)) {
642 if (atomic_wr(ibwr
)->remote_addr
& 0x7)
646 if (unlikely((ibwr
->send_flags
& IB_SEND_INLINE
) &&
647 (length
> sq
->max_inline
)))
656 static void init_send_wr(struct rxe_qp
*qp
, struct rxe_send_wr
*wr
,
657 struct ib_send_wr
*ibwr
)
659 wr
->wr_id
= ibwr
->wr_id
;
660 wr
->num_sge
= ibwr
->num_sge
;
661 wr
->opcode
= ibwr
->opcode
;
662 wr
->send_flags
= ibwr
->send_flags
;
664 if (qp_type(qp
) == IB_QPT_UD
||
665 qp_type(qp
) == IB_QPT_SMI
||
666 qp_type(qp
) == IB_QPT_GSI
) {
667 wr
->wr
.ud
.remote_qpn
= ud_wr(ibwr
)->remote_qpn
;
668 wr
->wr
.ud
.remote_qkey
= ud_wr(ibwr
)->remote_qkey
;
669 if (qp_type(qp
) == IB_QPT_GSI
)
670 wr
->wr
.ud
.pkey_index
= ud_wr(ibwr
)->pkey_index
;
671 if (wr
->opcode
== IB_WR_SEND_WITH_IMM
)
672 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
674 switch (wr
->opcode
) {
675 case IB_WR_RDMA_WRITE_WITH_IMM
:
676 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
677 case IB_WR_RDMA_READ
:
678 case IB_WR_RDMA_WRITE
:
679 wr
->wr
.rdma
.remote_addr
= rdma_wr(ibwr
)->remote_addr
;
680 wr
->wr
.rdma
.rkey
= rdma_wr(ibwr
)->rkey
;
682 case IB_WR_SEND_WITH_IMM
:
683 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
685 case IB_WR_SEND_WITH_INV
:
686 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
688 case IB_WR_ATOMIC_CMP_AND_SWP
:
689 case IB_WR_ATOMIC_FETCH_AND_ADD
:
690 wr
->wr
.atomic
.remote_addr
=
691 atomic_wr(ibwr
)->remote_addr
;
692 wr
->wr
.atomic
.compare_add
=
693 atomic_wr(ibwr
)->compare_add
;
694 wr
->wr
.atomic
.swap
= atomic_wr(ibwr
)->swap
;
695 wr
->wr
.atomic
.rkey
= atomic_wr(ibwr
)->rkey
;
697 case IB_WR_LOCAL_INV
:
698 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
701 wr
->wr
.reg
.mr
= reg_wr(ibwr
)->mr
;
702 wr
->wr
.reg
.key
= reg_wr(ibwr
)->key
;
703 wr
->wr
.reg
.access
= reg_wr(ibwr
)->access
;
711 static int init_send_wqe(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
712 unsigned int mask
, unsigned int length
,
713 struct rxe_send_wqe
*wqe
)
715 int num_sge
= ibwr
->num_sge
;
720 init_send_wr(qp
, &wqe
->wr
, ibwr
);
722 if (qp_type(qp
) == IB_QPT_UD
||
723 qp_type(qp
) == IB_QPT_SMI
||
724 qp_type(qp
) == IB_QPT_GSI
)
725 memcpy(&wqe
->av
, &to_rah(ud_wr(ibwr
)->ah
)->av
, sizeof(wqe
->av
));
727 if (unlikely(ibwr
->send_flags
& IB_SEND_INLINE
)) {
728 p
= wqe
->dma
.inline_data
;
731 for (i
= 0; i
< num_sge
; i
++, sge
++) {
732 if (qp
->is_user
&& copy_from_user(p
, (__user
void *)
733 (uintptr_t)sge
->addr
, sge
->length
))
736 else if (!qp
->is_user
)
737 memcpy(p
, (void *)(uintptr_t)sge
->addr
,
742 } else if (mask
& WR_REG_MASK
) {
744 wqe
->state
= wqe_state_posted
;
747 memcpy(wqe
->dma
.sge
, ibwr
->sg_list
,
748 num_sge
* sizeof(struct ib_sge
));
750 wqe
->iova
= (mask
& WR_ATOMIC_MASK
) ?
751 atomic_wr(ibwr
)->remote_addr
:
752 rdma_wr(ibwr
)->remote_addr
;
754 wqe
->dma
.length
= length
;
755 wqe
->dma
.resid
= length
;
756 wqe
->dma
.num_sge
= num_sge
;
757 wqe
->dma
.cur_sge
= 0;
758 wqe
->dma
.sge_offset
= 0;
759 wqe
->state
= wqe_state_posted
;
760 wqe
->ssn
= atomic_add_return(1, &qp
->ssn
);
765 static int post_one_send(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
766 unsigned int mask
, u32 length
)
769 struct rxe_sq
*sq
= &qp
->sq
;
770 struct rxe_send_wqe
*send_wqe
;
773 err
= validate_send_wr(qp
, ibwr
, mask
, length
);
777 spin_lock_irqsave(&qp
->sq
.sq_lock
, flags
);
779 if (unlikely(queue_full(sq
->queue
))) {
784 send_wqe
= producer_addr(sq
->queue
);
786 err
= init_send_wqe(qp
, ibwr
, mask
, length
, send_wqe
);
791 * make sure all changes to the work queue are
792 * written before we update the producer pointer
796 advance_producer(sq
->queue
);
797 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
802 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
806 static int rxe_post_send_kernel(struct rxe_qp
*qp
, struct ib_send_wr
*wr
,
807 struct ib_send_wr
**bad_wr
)
811 unsigned int length
= 0;
816 mask
= wr_opcode_mask(wr
->opcode
, qp
);
817 if (unlikely(!mask
)) {
823 if (unlikely((wr
->send_flags
& IB_SEND_INLINE
) &&
824 !(mask
& WR_INLINE_MASK
))) {
831 for (i
= 0; i
< wr
->num_sge
; i
++)
832 length
+= wr
->sg_list
[i
].length
;
834 err
= post_one_send(qp
, wr
, mask
, length
);
844 * Must sched in case of GSI QP because ib_send_mad() hold irq lock,
845 * and the requester call ip_local_out_sk() that takes spin_lock_bh.
847 must_sched
= (qp_type(qp
) == IB_QPT_GSI
) ||
848 (queue_count(qp
->sq
.queue
) > 1);
850 rxe_run_task(&qp
->req
.task
, must_sched
);
855 static int rxe_post_send(struct ib_qp
*ibqp
, struct ib_send_wr
*wr
,
856 struct ib_send_wr
**bad_wr
)
858 struct rxe_qp
*qp
= to_rqp(ibqp
);
860 if (unlikely(!qp
->valid
)) {
865 if (unlikely(qp
->req
.state
< QP_STATE_READY
)) {
871 /* Utilize process context to do protocol processing */
872 rxe_run_task(&qp
->req
.task
, 0);
875 return rxe_post_send_kernel(qp
, wr
, bad_wr
);
878 static int rxe_post_recv(struct ib_qp
*ibqp
, struct ib_recv_wr
*wr
,
879 struct ib_recv_wr
**bad_wr
)
882 struct rxe_qp
*qp
= to_rqp(ibqp
);
883 struct rxe_rq
*rq
= &qp
->rq
;
886 if (unlikely((qp_state(qp
) < IB_QPS_INIT
) || !qp
->valid
)) {
892 if (unlikely(qp
->srq
)) {
898 spin_lock_irqsave(&rq
->producer_lock
, flags
);
901 err
= post_one_recv(rq
, wr
);
909 spin_unlock_irqrestore(&rq
->producer_lock
, flags
);
915 static struct ib_cq
*rxe_create_cq(struct ib_device
*dev
,
916 const struct ib_cq_init_attr
*attr
,
917 struct ib_ucontext
*context
,
918 struct ib_udata
*udata
)
921 struct rxe_dev
*rxe
= to_rdev(dev
);
925 return ERR_PTR(-EINVAL
);
927 err
= rxe_cq_chk_attr(rxe
, NULL
, attr
->cqe
, attr
->comp_vector
, udata
);
931 cq
= rxe_alloc(&rxe
->cq_pool
);
937 err
= rxe_cq_from_init(rxe
, cq
, attr
->cqe
, attr
->comp_vector
,
950 static int rxe_destroy_cq(struct ib_cq
*ibcq
)
952 struct rxe_cq
*cq
= to_rcq(ibcq
);
958 static int rxe_resize_cq(struct ib_cq
*ibcq
, int cqe
, struct ib_udata
*udata
)
961 struct rxe_cq
*cq
= to_rcq(ibcq
);
962 struct rxe_dev
*rxe
= to_rdev(ibcq
->device
);
964 err
= rxe_cq_chk_attr(rxe
, cq
, cqe
, 0, udata
);
968 err
= rxe_cq_resize_queue(cq
, cqe
, udata
);
978 static int rxe_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
981 struct rxe_cq
*cq
= to_rcq(ibcq
);
985 spin_lock_irqsave(&cq
->cq_lock
, flags
);
986 for (i
= 0; i
< num_entries
; i
++) {
987 cqe
= queue_head(cq
->queue
);
991 memcpy(wc
++, &cqe
->ibwc
, sizeof(*wc
));
992 advance_consumer(cq
->queue
);
994 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
999 static int rxe_peek_cq(struct ib_cq
*ibcq
, int wc_cnt
)
1001 struct rxe_cq
*cq
= to_rcq(ibcq
);
1002 int count
= queue_count(cq
->queue
);
1004 return (count
> wc_cnt
) ? wc_cnt
: count
;
1007 static int rxe_req_notify_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
1009 struct rxe_cq
*cq
= to_rcq(ibcq
);
1011 if (cq
->notify
!= IB_CQ_NEXT_COMP
)
1012 cq
->notify
= flags
& IB_CQ_SOLICITED_MASK
;
1017 static struct ib_mr
*rxe_get_dma_mr(struct ib_pd
*ibpd
, int access
)
1019 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1020 struct rxe_pd
*pd
= to_rpd(ibpd
);
1024 mr
= rxe_alloc(&rxe
->mr_pool
);
1034 err
= rxe_mem_init_dma(rxe
, pd
, access
, mr
);
1045 return ERR_PTR(err
);
1048 static struct ib_mr
*rxe_reg_user_mr(struct ib_pd
*ibpd
,
1052 int access
, struct ib_udata
*udata
)
1055 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1056 struct rxe_pd
*pd
= to_rpd(ibpd
);
1059 mr
= rxe_alloc(&rxe
->mr_pool
);
1069 err
= rxe_mem_init_user(rxe
, pd
, start
, length
, iova
,
1081 return ERR_PTR(err
);
1084 static int rxe_dereg_mr(struct ib_mr
*ibmr
)
1086 struct rxe_mem
*mr
= to_rmr(ibmr
);
1088 mr
->state
= RXE_MEM_STATE_ZOMBIE
;
1089 rxe_drop_ref(mr
->pd
);
1095 static struct ib_mr
*rxe_alloc_mr(struct ib_pd
*ibpd
,
1096 enum ib_mr_type mr_type
,
1099 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1100 struct rxe_pd
*pd
= to_rpd(ibpd
);
1104 if (mr_type
!= IB_MR_TYPE_MEM_REG
)
1105 return ERR_PTR(-EINVAL
);
1107 mr
= rxe_alloc(&rxe
->mr_pool
);
1117 err
= rxe_mem_init_fast(rxe
, pd
, max_num_sg
, mr
);
1128 return ERR_PTR(err
);
1131 static int rxe_set_page(struct ib_mr
*ibmr
, u64 addr
)
1133 struct rxe_mem
*mr
= to_rmr(ibmr
);
1134 struct rxe_map
*map
;
1135 struct rxe_phys_buf
*buf
;
1137 if (unlikely(mr
->nbuf
== mr
->num_buf
))
1140 map
= mr
->map
[mr
->nbuf
/ RXE_BUF_PER_MAP
];
1141 buf
= &map
->buf
[mr
->nbuf
% RXE_BUF_PER_MAP
];
1144 buf
->size
= ibmr
->page_size
;
1150 static int rxe_map_mr_sg(struct ib_mr
*ibmr
, struct scatterlist
*sg
,
1151 int sg_nents
, unsigned int *sg_offset
)
1153 struct rxe_mem
*mr
= to_rmr(ibmr
);
1158 n
= ib_sg_to_pages(ibmr
, sg
, sg_nents
, sg_offset
, rxe_set_page
);
1160 mr
->va
= ibmr
->iova
;
1161 mr
->iova
= ibmr
->iova
;
1162 mr
->length
= ibmr
->length
;
1163 mr
->page_shift
= ilog2(ibmr
->page_size
);
1164 mr
->page_mask
= ibmr
->page_size
- 1;
1165 mr
->offset
= mr
->iova
& mr
->page_mask
;
1170 static int rxe_attach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1173 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1174 struct rxe_qp
*qp
= to_rqp(ibqp
);
1175 struct rxe_mc_grp
*grp
;
1177 /* takes a ref on grp if successful */
1178 err
= rxe_mcast_get_grp(rxe
, mgid
, &grp
);
1182 err
= rxe_mcast_add_grp_elem(rxe
, qp
, grp
);
1188 static int rxe_detach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1190 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1191 struct rxe_qp
*qp
= to_rqp(ibqp
);
1193 return rxe_mcast_drop_grp_elem(rxe
, qp
, mgid
);
1196 static ssize_t
rxe_show_parent(struct device
*device
,
1197 struct device_attribute
*attr
, char *buf
)
1199 struct rxe_dev
*rxe
= container_of(device
, struct rxe_dev
,
1203 name
= rxe
->ifc_ops
->parent_name(rxe
, 1);
1204 return snprintf(buf
, 16, "%s\n", name
);
1207 static DEVICE_ATTR(parent
, S_IRUGO
, rxe_show_parent
, NULL
);
1209 static struct device_attribute
*rxe_dev_attributes
[] = {
1213 int rxe_register_device(struct rxe_dev
*rxe
)
1217 struct ib_device
*dev
= &rxe
->ib_dev
;
1219 strlcpy(dev
->name
, "rxe%d", IB_DEVICE_NAME_MAX
);
1220 strlcpy(dev
->node_desc
, "rxe", sizeof(dev
->node_desc
));
1222 dev
->owner
= THIS_MODULE
;
1223 dev
->node_type
= RDMA_NODE_IB_CA
;
1224 dev
->phys_port_cnt
= 1;
1225 dev
->num_comp_vectors
= RXE_NUM_COMP_VECTORS
;
1226 dev
->dma_device
= rxe
->ifc_ops
->dma_device(rxe
);
1227 dev
->local_dma_lkey
= 0;
1228 dev
->node_guid
= rxe
->ifc_ops
->node_guid(rxe
);
1229 dev
->dma_ops
= &rxe_dma_mapping_ops
;
1231 dev
->uverbs_abi_ver
= RXE_UVERBS_ABI_VERSION
;
1232 dev
->uverbs_cmd_mask
= BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT
)
1233 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
)
1234 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE
)
1235 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT
)
1236 | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD
)
1237 | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD
)
1238 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ
)
1239 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ
)
1240 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ
)
1241 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ
)
1242 | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV
)
1243 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP
)
1244 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP
)
1245 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP
)
1246 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP
)
1247 | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND
)
1248 | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV
)
1249 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ
)
1250 | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ
)
1251 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ
)
1252 | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ
)
1253 | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ
)
1254 | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
)
1255 | BIT_ULL(IB_USER_VERBS_CMD_REG_MR
)
1256 | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR
)
1257 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH
)
1258 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH
)
1259 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH
)
1260 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH
)
1261 | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST
)
1262 | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST
)
1265 dev
->query_device
= rxe_query_device
;
1266 dev
->modify_device
= rxe_modify_device
;
1267 dev
->query_port
= rxe_query_port
;
1268 dev
->modify_port
= rxe_modify_port
;
1269 dev
->get_link_layer
= rxe_get_link_layer
;
1270 dev
->query_gid
= rxe_query_gid
;
1271 dev
->get_netdev
= rxe_get_netdev
;
1272 dev
->add_gid
= rxe_add_gid
;
1273 dev
->del_gid
= rxe_del_gid
;
1274 dev
->query_pkey
= rxe_query_pkey
;
1275 dev
->alloc_ucontext
= rxe_alloc_ucontext
;
1276 dev
->dealloc_ucontext
= rxe_dealloc_ucontext
;
1277 dev
->mmap
= rxe_mmap
;
1278 dev
->get_port_immutable
= rxe_port_immutable
;
1279 dev
->alloc_pd
= rxe_alloc_pd
;
1280 dev
->dealloc_pd
= rxe_dealloc_pd
;
1281 dev
->create_ah
= rxe_create_ah
;
1282 dev
->modify_ah
= rxe_modify_ah
;
1283 dev
->query_ah
= rxe_query_ah
;
1284 dev
->destroy_ah
= rxe_destroy_ah
;
1285 dev
->create_srq
= rxe_create_srq
;
1286 dev
->modify_srq
= rxe_modify_srq
;
1287 dev
->query_srq
= rxe_query_srq
;
1288 dev
->destroy_srq
= rxe_destroy_srq
;
1289 dev
->post_srq_recv
= rxe_post_srq_recv
;
1290 dev
->create_qp
= rxe_create_qp
;
1291 dev
->modify_qp
= rxe_modify_qp
;
1292 dev
->query_qp
= rxe_query_qp
;
1293 dev
->destroy_qp
= rxe_destroy_qp
;
1294 dev
->post_send
= rxe_post_send
;
1295 dev
->post_recv
= rxe_post_recv
;
1296 dev
->create_cq
= rxe_create_cq
;
1297 dev
->destroy_cq
= rxe_destroy_cq
;
1298 dev
->resize_cq
= rxe_resize_cq
;
1299 dev
->poll_cq
= rxe_poll_cq
;
1300 dev
->peek_cq
= rxe_peek_cq
;
1301 dev
->req_notify_cq
= rxe_req_notify_cq
;
1302 dev
->get_dma_mr
= rxe_get_dma_mr
;
1303 dev
->reg_user_mr
= rxe_reg_user_mr
;
1304 dev
->dereg_mr
= rxe_dereg_mr
;
1305 dev
->alloc_mr
= rxe_alloc_mr
;
1306 dev
->map_mr_sg
= rxe_map_mr_sg
;
1307 dev
->attach_mcast
= rxe_attach_mcast
;
1308 dev
->detach_mcast
= rxe_detach_mcast
;
1310 err
= ib_register_device(dev
, NULL
);
1312 pr_warn("rxe_register_device failed, err = %d\n", err
);
1316 for (i
= 0; i
< ARRAY_SIZE(rxe_dev_attributes
); ++i
) {
1317 err
= device_create_file(&dev
->dev
, rxe_dev_attributes
[i
]);
1319 pr_warn("device_create_file failed, i = %d, err = %d\n",
1328 ib_unregister_device(dev
);
1333 int rxe_unregister_device(struct rxe_dev
*rxe
)
1336 struct ib_device
*dev
= &rxe
->ib_dev
;
1338 for (i
= 0; i
< ARRAY_SIZE(rxe_dev_attributes
); ++i
)
1339 device_remove_file(&dev
->dev
, rxe_dev_attributes
[i
]);
1341 ib_unregister_device(dev
);