2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/dma-mapping.h>
35 #include <net/addrconf.h>
38 #include "rxe_queue.h"
39 #include "rxe_hw_counters.h"
41 static int rxe_query_device(struct ib_device
*dev
,
42 struct ib_device_attr
*attr
,
45 struct rxe_dev
*rxe
= to_rdev(dev
);
47 if (uhw
->inlen
|| uhw
->outlen
)
54 static int rxe_query_port(struct ib_device
*dev
,
55 u8 port_num
, struct ib_port_attr
*attr
)
57 struct rxe_dev
*rxe
= to_rdev(dev
);
58 struct rxe_port
*port
;
61 if (unlikely(port_num
!= 1)) {
62 pr_warn("invalid port_number %d\n", port_num
);
68 /* *attr being zeroed by the caller, avoid zeroing it here */
71 mutex_lock(&rxe
->usdev_lock
);
72 rc
= ib_get_eth_speed(dev
, port_num
, &attr
->active_speed
,
74 mutex_unlock(&rxe
->usdev_lock
);
80 static int rxe_query_gid(struct ib_device
*device
,
81 u8 port_num
, int index
, union ib_gid
*gid
)
85 if (index
> RXE_PORT_GID_TBL_LEN
)
88 ret
= ib_get_cached_gid(device
, port_num
, index
, gid
, NULL
);
90 memcpy(gid
, &zgid
, sizeof(*gid
));
97 static int rxe_add_gid(struct ib_device
*device
, u8 port_num
, unsigned int
98 index
, const union ib_gid
*gid
,
99 const struct ib_gid_attr
*attr
, void **context
)
101 if (index
>= RXE_PORT_GID_TBL_LEN
)
106 static int rxe_del_gid(struct ib_device
*device
, u8 port_num
, unsigned int
107 index
, void **context
)
109 if (index
>= RXE_PORT_GID_TBL_LEN
)
114 static struct net_device
*rxe_get_netdev(struct ib_device
*device
,
117 struct rxe_dev
*rxe
= to_rdev(device
);
127 static int rxe_query_pkey(struct ib_device
*device
,
128 u8 port_num
, u16 index
, u16
*pkey
)
130 struct rxe_dev
*rxe
= to_rdev(device
);
131 struct rxe_port
*port
;
133 if (unlikely(port_num
!= 1)) {
134 dev_warn(device
->dev
.parent
, "invalid port_num = %d\n",
141 if (unlikely(index
>= port
->attr
.pkey_tbl_len
)) {
142 dev_warn(device
->dev
.parent
, "invalid index = %d\n",
147 *pkey
= port
->pkey_tbl
[index
];
154 static int rxe_modify_device(struct ib_device
*dev
,
155 int mask
, struct ib_device_modify
*attr
)
157 struct rxe_dev
*rxe
= to_rdev(dev
);
159 if (mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
)
160 rxe
->attr
.sys_image_guid
= cpu_to_be64(attr
->sys_image_guid
);
162 if (mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
163 memcpy(rxe
->ib_dev
.node_desc
,
164 attr
->node_desc
, sizeof(rxe
->ib_dev
.node_desc
));
170 static int rxe_modify_port(struct ib_device
*dev
,
171 u8 port_num
, int mask
, struct ib_port_modify
*attr
)
173 struct rxe_dev
*rxe
= to_rdev(dev
);
174 struct rxe_port
*port
;
176 if (unlikely(port_num
!= 1)) {
177 pr_warn("invalid port_num = %d\n", port_num
);
183 port
->attr
.port_cap_flags
|= attr
->set_port_cap_mask
;
184 port
->attr
.port_cap_flags
&= ~attr
->clr_port_cap_mask
;
186 if (mask
& IB_PORT_RESET_QKEY_CNTR
)
187 port
->attr
.qkey_viol_cntr
= 0;
195 static enum rdma_link_layer
rxe_get_link_layer(struct ib_device
*dev
,
198 struct rxe_dev
*rxe
= to_rdev(dev
);
200 return rxe_link_layer(rxe
, port_num
);
203 static struct ib_ucontext
*rxe_alloc_ucontext(struct ib_device
*dev
,
204 struct ib_udata
*udata
)
206 struct rxe_dev
*rxe
= to_rdev(dev
);
207 struct rxe_ucontext
*uc
;
209 uc
= rxe_alloc(&rxe
->uc_pool
);
210 return uc
? &uc
->ibuc
: ERR_PTR(-ENOMEM
);
213 static int rxe_dealloc_ucontext(struct ib_ucontext
*ibuc
)
215 struct rxe_ucontext
*uc
= to_ruc(ibuc
);
221 static int rxe_port_immutable(struct ib_device
*dev
, u8 port_num
,
222 struct ib_port_immutable
*immutable
)
225 struct ib_port_attr attr
;
227 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP
;
229 err
= ib_query_port(dev
, port_num
, &attr
);
233 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
234 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
235 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
240 static struct ib_pd
*rxe_alloc_pd(struct ib_device
*dev
,
241 struct ib_ucontext
*context
,
242 struct ib_udata
*udata
)
244 struct rxe_dev
*rxe
= to_rdev(dev
);
247 pd
= rxe_alloc(&rxe
->pd_pool
);
248 return pd
? &pd
->ibpd
: ERR_PTR(-ENOMEM
);
251 static int rxe_dealloc_pd(struct ib_pd
*ibpd
)
253 struct rxe_pd
*pd
= to_rpd(ibpd
);
259 static int rxe_init_av(struct rxe_dev
*rxe
, struct rdma_ah_attr
*attr
,
264 struct ib_gid_attr sgid_attr
;
266 err
= ib_get_cached_gid(&rxe
->ib_dev
, rdma_ah_get_port_num(attr
),
267 rdma_ah_read_grh(attr
)->sgid_index
, &sgid
,
270 pr_err("Failed to query sgid. err = %d\n", err
);
274 rxe_av_from_attr(rdma_ah_get_port_num(attr
), av
, attr
);
275 rxe_av_fill_ip_info(av
, attr
, &sgid_attr
, &sgid
);
278 dev_put(sgid_attr
.ndev
);
282 static struct ib_ah
*rxe_create_ah(struct ib_pd
*ibpd
,
283 struct rdma_ah_attr
*attr
,
284 struct ib_udata
*udata
)
288 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
289 struct rxe_pd
*pd
= to_rpd(ibpd
);
292 err
= rxe_av_chk_attr(rxe
, attr
);
296 ah
= rxe_alloc(&rxe
->ah_pool
);
305 err
= rxe_init_av(rxe
, attr
, &ah
->av
);
318 static int rxe_modify_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
)
321 struct rxe_dev
*rxe
= to_rdev(ibah
->device
);
322 struct rxe_ah
*ah
= to_rah(ibah
);
324 err
= rxe_av_chk_attr(rxe
, attr
);
328 err
= rxe_init_av(rxe
, attr
, &ah
->av
);
335 static int rxe_query_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
)
337 struct rxe_ah
*ah
= to_rah(ibah
);
339 memset(attr
, 0, sizeof(*attr
));
340 attr
->type
= ibah
->type
;
341 rxe_av_to_attr(&ah
->av
, attr
);
345 static int rxe_destroy_ah(struct ib_ah
*ibah
)
347 struct rxe_ah
*ah
= to_rah(ibah
);
349 rxe_drop_ref(ah
->pd
);
354 static int post_one_recv(struct rxe_rq
*rq
, struct ib_recv_wr
*ibwr
)
359 struct rxe_recv_wqe
*recv_wqe
;
360 int num_sge
= ibwr
->num_sge
;
362 if (unlikely(queue_full(rq
->queue
))) {
367 if (unlikely(num_sge
> rq
->max_sge
)) {
373 for (i
= 0; i
< num_sge
; i
++)
374 length
+= ibwr
->sg_list
[i
].length
;
376 recv_wqe
= producer_addr(rq
->queue
);
377 recv_wqe
->wr_id
= ibwr
->wr_id
;
378 recv_wqe
->num_sge
= num_sge
;
380 memcpy(recv_wqe
->dma
.sge
, ibwr
->sg_list
,
381 num_sge
* sizeof(struct ib_sge
));
383 recv_wqe
->dma
.length
= length
;
384 recv_wqe
->dma
.resid
= length
;
385 recv_wqe
->dma
.num_sge
= num_sge
;
386 recv_wqe
->dma
.cur_sge
= 0;
387 recv_wqe
->dma
.sge_offset
= 0;
389 /* make sure all changes to the work queue are written before we
390 * update the producer pointer
394 advance_producer(rq
->queue
);
401 static struct ib_srq
*rxe_create_srq(struct ib_pd
*ibpd
,
402 struct ib_srq_init_attr
*init
,
403 struct ib_udata
*udata
)
406 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
407 struct rxe_pd
*pd
= to_rpd(ibpd
);
409 struct ib_ucontext
*context
= udata
? ibpd
->uobject
->context
: NULL
;
411 err
= rxe_srq_chk_attr(rxe
, NULL
, &init
->attr
, IB_SRQ_INIT_MASK
);
415 srq
= rxe_alloc(&rxe
->srq_pool
);
425 err
= rxe_srq_from_init(rxe
, srq
, init
, context
, udata
);
439 static int rxe_modify_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
,
440 enum ib_srq_attr_mask mask
,
441 struct ib_udata
*udata
)
444 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
445 struct rxe_dev
*rxe
= to_rdev(ibsrq
->device
);
447 err
= rxe_srq_chk_attr(rxe
, srq
, attr
, mask
);
451 err
= rxe_srq_from_attr(rxe
, srq
, attr
, mask
, udata
);
461 static int rxe_query_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
)
463 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
468 attr
->max_wr
= srq
->rq
.queue
->buf
->index_mask
;
469 attr
->max_sge
= srq
->rq
.max_sge
;
470 attr
->srq_limit
= srq
->limit
;
474 static int rxe_destroy_srq(struct ib_srq
*ibsrq
)
476 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
479 rxe_queue_cleanup(srq
->rq
.queue
);
481 rxe_drop_ref(srq
->pd
);
488 static int rxe_post_srq_recv(struct ib_srq
*ibsrq
, struct ib_recv_wr
*wr
,
489 struct ib_recv_wr
**bad_wr
)
493 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
495 spin_lock_irqsave(&srq
->rq
.producer_lock
, flags
);
498 err
= post_one_recv(&srq
->rq
, wr
);
504 spin_unlock_irqrestore(&srq
->rq
.producer_lock
, flags
);
512 static struct ib_qp
*rxe_create_qp(struct ib_pd
*ibpd
,
513 struct ib_qp_init_attr
*init
,
514 struct ib_udata
*udata
)
517 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
518 struct rxe_pd
*pd
= to_rpd(ibpd
);
521 err
= rxe_qp_chk_init(rxe
, init
);
525 qp
= rxe_alloc(&rxe
->qp_pool
);
541 err
= rxe_qp_from_init(rxe
, qp
, pd
, init
, udata
, ibpd
);
555 static int rxe_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
556 int mask
, struct ib_udata
*udata
)
559 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
560 struct rxe_qp
*qp
= to_rqp(ibqp
);
562 err
= rxe_qp_chk_attr(rxe
, qp
, attr
, mask
);
566 err
= rxe_qp_from_attr(qp
, attr
, mask
, udata
);
576 static int rxe_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
577 int mask
, struct ib_qp_init_attr
*init
)
579 struct rxe_qp
*qp
= to_rqp(ibqp
);
581 rxe_qp_to_init(qp
, init
);
582 rxe_qp_to_attr(qp
, attr
, mask
);
587 static int rxe_destroy_qp(struct ib_qp
*ibqp
)
589 struct rxe_qp
*qp
= to_rqp(ibqp
);
597 static int validate_send_wr(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
598 unsigned int mask
, unsigned int length
)
600 int num_sge
= ibwr
->num_sge
;
601 struct rxe_sq
*sq
= &qp
->sq
;
603 if (unlikely(num_sge
> sq
->max_sge
))
606 if (unlikely(mask
& WR_ATOMIC_MASK
)) {
610 if (atomic_wr(ibwr
)->remote_addr
& 0x7)
614 if (unlikely((ibwr
->send_flags
& IB_SEND_INLINE
) &&
615 (length
> sq
->max_inline
)))
624 static void init_send_wr(struct rxe_qp
*qp
, struct rxe_send_wr
*wr
,
625 struct ib_send_wr
*ibwr
)
627 wr
->wr_id
= ibwr
->wr_id
;
628 wr
->num_sge
= ibwr
->num_sge
;
629 wr
->opcode
= ibwr
->opcode
;
630 wr
->send_flags
= ibwr
->send_flags
;
632 if (qp_type(qp
) == IB_QPT_UD
||
633 qp_type(qp
) == IB_QPT_SMI
||
634 qp_type(qp
) == IB_QPT_GSI
) {
635 wr
->wr
.ud
.remote_qpn
= ud_wr(ibwr
)->remote_qpn
;
636 wr
->wr
.ud
.remote_qkey
= ud_wr(ibwr
)->remote_qkey
;
637 if (qp_type(qp
) == IB_QPT_GSI
)
638 wr
->wr
.ud
.pkey_index
= ud_wr(ibwr
)->pkey_index
;
639 if (wr
->opcode
== IB_WR_SEND_WITH_IMM
)
640 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
642 switch (wr
->opcode
) {
643 case IB_WR_RDMA_WRITE_WITH_IMM
:
644 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
646 case IB_WR_RDMA_READ
:
647 case IB_WR_RDMA_WRITE
:
648 wr
->wr
.rdma
.remote_addr
= rdma_wr(ibwr
)->remote_addr
;
649 wr
->wr
.rdma
.rkey
= rdma_wr(ibwr
)->rkey
;
651 case IB_WR_SEND_WITH_IMM
:
652 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
654 case IB_WR_SEND_WITH_INV
:
655 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
657 case IB_WR_ATOMIC_CMP_AND_SWP
:
658 case IB_WR_ATOMIC_FETCH_AND_ADD
:
659 wr
->wr
.atomic
.remote_addr
=
660 atomic_wr(ibwr
)->remote_addr
;
661 wr
->wr
.atomic
.compare_add
=
662 atomic_wr(ibwr
)->compare_add
;
663 wr
->wr
.atomic
.swap
= atomic_wr(ibwr
)->swap
;
664 wr
->wr
.atomic
.rkey
= atomic_wr(ibwr
)->rkey
;
666 case IB_WR_LOCAL_INV
:
667 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
670 wr
->wr
.reg
.mr
= reg_wr(ibwr
)->mr
;
671 wr
->wr
.reg
.key
= reg_wr(ibwr
)->key
;
672 wr
->wr
.reg
.access
= reg_wr(ibwr
)->access
;
680 static int init_send_wqe(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
681 unsigned int mask
, unsigned int length
,
682 struct rxe_send_wqe
*wqe
)
684 int num_sge
= ibwr
->num_sge
;
689 init_send_wr(qp
, &wqe
->wr
, ibwr
);
691 if (qp_type(qp
) == IB_QPT_UD
||
692 qp_type(qp
) == IB_QPT_SMI
||
693 qp_type(qp
) == IB_QPT_GSI
)
694 memcpy(&wqe
->av
, &to_rah(ud_wr(ibwr
)->ah
)->av
, sizeof(wqe
->av
));
696 if (unlikely(ibwr
->send_flags
& IB_SEND_INLINE
)) {
697 p
= wqe
->dma
.inline_data
;
700 for (i
= 0; i
< num_sge
; i
++, sge
++) {
701 memcpy(p
, (void *)(uintptr_t)sge
->addr
,
706 } else if (mask
& WR_REG_MASK
) {
708 wqe
->state
= wqe_state_posted
;
711 memcpy(wqe
->dma
.sge
, ibwr
->sg_list
,
712 num_sge
* sizeof(struct ib_sge
));
714 wqe
->iova
= (mask
& WR_ATOMIC_MASK
) ?
715 atomic_wr(ibwr
)->remote_addr
:
716 rdma_wr(ibwr
)->remote_addr
;
718 wqe
->dma
.length
= length
;
719 wqe
->dma
.resid
= length
;
720 wqe
->dma
.num_sge
= num_sge
;
721 wqe
->dma
.cur_sge
= 0;
722 wqe
->dma
.sge_offset
= 0;
723 wqe
->state
= wqe_state_posted
;
724 wqe
->ssn
= atomic_add_return(1, &qp
->ssn
);
729 static int post_one_send(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
730 unsigned int mask
, u32 length
)
733 struct rxe_sq
*sq
= &qp
->sq
;
734 struct rxe_send_wqe
*send_wqe
;
737 err
= validate_send_wr(qp
, ibwr
, mask
, length
);
741 spin_lock_irqsave(&qp
->sq
.sq_lock
, flags
);
743 if (unlikely(queue_full(sq
->queue
))) {
748 send_wqe
= producer_addr(sq
->queue
);
750 err
= init_send_wqe(qp
, ibwr
, mask
, length
, send_wqe
);
755 * make sure all changes to the work queue are
756 * written before we update the producer pointer
760 advance_producer(sq
->queue
);
761 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
766 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
770 static int rxe_post_send_kernel(struct rxe_qp
*qp
, struct ib_send_wr
*wr
,
771 struct ib_send_wr
**bad_wr
)
775 unsigned int length
= 0;
780 mask
= wr_opcode_mask(wr
->opcode
, qp
);
781 if (unlikely(!mask
)) {
787 if (unlikely((wr
->send_flags
& IB_SEND_INLINE
) &&
788 !(mask
& WR_INLINE_MASK
))) {
795 for (i
= 0; i
< wr
->num_sge
; i
++)
796 length
+= wr
->sg_list
[i
].length
;
798 err
= post_one_send(qp
, wr
, mask
, length
);
808 * Must sched in case of GSI QP because ib_send_mad() hold irq lock,
809 * and the requester call ip_local_out_sk() that takes spin_lock_bh.
811 must_sched
= (qp_type(qp
) == IB_QPT_GSI
) ||
812 (queue_count(qp
->sq
.queue
) > 1);
814 rxe_run_task(&qp
->req
.task
, must_sched
);
815 if (unlikely(qp
->req
.state
== QP_STATE_ERROR
))
816 rxe_run_task(&qp
->comp
.task
, 1);
821 static int rxe_post_send(struct ib_qp
*ibqp
, struct ib_send_wr
*wr
,
822 struct ib_send_wr
**bad_wr
)
824 struct rxe_qp
*qp
= to_rqp(ibqp
);
826 if (unlikely(!qp
->valid
)) {
831 if (unlikely(qp
->req
.state
< QP_STATE_READY
)) {
837 /* Utilize process context to do protocol processing */
838 rxe_run_task(&qp
->req
.task
, 0);
841 return rxe_post_send_kernel(qp
, wr
, bad_wr
);
844 static int rxe_post_recv(struct ib_qp
*ibqp
, struct ib_recv_wr
*wr
,
845 struct ib_recv_wr
**bad_wr
)
848 struct rxe_qp
*qp
= to_rqp(ibqp
);
849 struct rxe_rq
*rq
= &qp
->rq
;
852 if (unlikely((qp_state(qp
) < IB_QPS_INIT
) || !qp
->valid
)) {
858 if (unlikely(qp
->srq
)) {
864 spin_lock_irqsave(&rq
->producer_lock
, flags
);
867 err
= post_one_recv(rq
, wr
);
875 spin_unlock_irqrestore(&rq
->producer_lock
, flags
);
877 if (qp
->resp
.state
== QP_STATE_ERROR
)
878 rxe_run_task(&qp
->resp
.task
, 1);
884 static struct ib_cq
*rxe_create_cq(struct ib_device
*dev
,
885 const struct ib_cq_init_attr
*attr
,
886 struct ib_ucontext
*context
,
887 struct ib_udata
*udata
)
890 struct rxe_dev
*rxe
= to_rdev(dev
);
894 return ERR_PTR(-EINVAL
);
896 err
= rxe_cq_chk_attr(rxe
, NULL
, attr
->cqe
, attr
->comp_vector
, udata
);
900 cq
= rxe_alloc(&rxe
->cq_pool
);
906 err
= rxe_cq_from_init(rxe
, cq
, attr
->cqe
, attr
->comp_vector
,
919 static int rxe_destroy_cq(struct ib_cq
*ibcq
)
921 struct rxe_cq
*cq
= to_rcq(ibcq
);
929 static int rxe_resize_cq(struct ib_cq
*ibcq
, int cqe
, struct ib_udata
*udata
)
932 struct rxe_cq
*cq
= to_rcq(ibcq
);
933 struct rxe_dev
*rxe
= to_rdev(ibcq
->device
);
935 err
= rxe_cq_chk_attr(rxe
, cq
, cqe
, 0, udata
);
939 err
= rxe_cq_resize_queue(cq
, cqe
, udata
);
949 static int rxe_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
952 struct rxe_cq
*cq
= to_rcq(ibcq
);
956 spin_lock_irqsave(&cq
->cq_lock
, flags
);
957 for (i
= 0; i
< num_entries
; i
++) {
958 cqe
= queue_head(cq
->queue
);
962 memcpy(wc
++, &cqe
->ibwc
, sizeof(*wc
));
963 advance_consumer(cq
->queue
);
965 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
970 static int rxe_peek_cq(struct ib_cq
*ibcq
, int wc_cnt
)
972 struct rxe_cq
*cq
= to_rcq(ibcq
);
973 int count
= queue_count(cq
->queue
);
975 return (count
> wc_cnt
) ? wc_cnt
: count
;
978 static int rxe_req_notify_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
980 struct rxe_cq
*cq
= to_rcq(ibcq
);
981 unsigned long irq_flags
;
984 spin_lock_irqsave(&cq
->cq_lock
, irq_flags
);
985 if (cq
->notify
!= IB_CQ_NEXT_COMP
)
986 cq
->notify
= flags
& IB_CQ_SOLICITED_MASK
;
988 if ((flags
& IB_CQ_REPORT_MISSED_EVENTS
) && !queue_empty(cq
->queue
))
991 spin_unlock_irqrestore(&cq
->cq_lock
, irq_flags
);
996 static struct ib_mr
*rxe_get_dma_mr(struct ib_pd
*ibpd
, int access
)
998 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
999 struct rxe_pd
*pd
= to_rpd(ibpd
);
1003 mr
= rxe_alloc(&rxe
->mr_pool
);
1013 err
= rxe_mem_init_dma(rxe
, pd
, access
, mr
);
1024 return ERR_PTR(err
);
1027 static struct ib_mr
*rxe_reg_user_mr(struct ib_pd
*ibpd
,
1031 int access
, struct ib_udata
*udata
)
1034 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1035 struct rxe_pd
*pd
= to_rpd(ibpd
);
1038 mr
= rxe_alloc(&rxe
->mr_pool
);
1048 err
= rxe_mem_init_user(rxe
, pd
, start
, length
, iova
,
1060 return ERR_PTR(err
);
1063 static int rxe_dereg_mr(struct ib_mr
*ibmr
)
1065 struct rxe_mem
*mr
= to_rmr(ibmr
);
1067 mr
->state
= RXE_MEM_STATE_ZOMBIE
;
1068 rxe_drop_ref(mr
->pd
);
1074 static struct ib_mr
*rxe_alloc_mr(struct ib_pd
*ibpd
,
1075 enum ib_mr_type mr_type
,
1078 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1079 struct rxe_pd
*pd
= to_rpd(ibpd
);
1083 if (mr_type
!= IB_MR_TYPE_MEM_REG
)
1084 return ERR_PTR(-EINVAL
);
1086 mr
= rxe_alloc(&rxe
->mr_pool
);
1096 err
= rxe_mem_init_fast(rxe
, pd
, max_num_sg
, mr
);
1107 return ERR_PTR(err
);
1110 static int rxe_set_page(struct ib_mr
*ibmr
, u64 addr
)
1112 struct rxe_mem
*mr
= to_rmr(ibmr
);
1113 struct rxe_map
*map
;
1114 struct rxe_phys_buf
*buf
;
1116 if (unlikely(mr
->nbuf
== mr
->num_buf
))
1119 map
= mr
->map
[mr
->nbuf
/ RXE_BUF_PER_MAP
];
1120 buf
= &map
->buf
[mr
->nbuf
% RXE_BUF_PER_MAP
];
1123 buf
->size
= ibmr
->page_size
;
1129 static int rxe_map_mr_sg(struct ib_mr
*ibmr
, struct scatterlist
*sg
,
1130 int sg_nents
, unsigned int *sg_offset
)
1132 struct rxe_mem
*mr
= to_rmr(ibmr
);
1137 n
= ib_sg_to_pages(ibmr
, sg
, sg_nents
, sg_offset
, rxe_set_page
);
1139 mr
->va
= ibmr
->iova
;
1140 mr
->iova
= ibmr
->iova
;
1141 mr
->length
= ibmr
->length
;
1142 mr
->page_shift
= ilog2(ibmr
->page_size
);
1143 mr
->page_mask
= ibmr
->page_size
- 1;
1144 mr
->offset
= mr
->iova
& mr
->page_mask
;
1149 static int rxe_attach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1152 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1153 struct rxe_qp
*qp
= to_rqp(ibqp
);
1154 struct rxe_mc_grp
*grp
;
1156 /* takes a ref on grp if successful */
1157 err
= rxe_mcast_get_grp(rxe
, mgid
, &grp
);
1161 err
= rxe_mcast_add_grp_elem(rxe
, qp
, grp
);
1167 static int rxe_detach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1169 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1170 struct rxe_qp
*qp
= to_rqp(ibqp
);
1172 return rxe_mcast_drop_grp_elem(rxe
, qp
, mgid
);
1175 static ssize_t
parent_show(struct device
*device
,
1176 struct device_attribute
*attr
, char *buf
)
1178 struct rxe_dev
*rxe
= container_of(device
, struct rxe_dev
,
1181 return snprintf(buf
, 16, "%s\n", rxe_parent_name(rxe
, 1));
1184 static DEVICE_ATTR_RO(parent
);
1186 static struct device_attribute
*rxe_dev_attributes
[] = {
1190 int rxe_register_device(struct rxe_dev
*rxe
)
1194 struct ib_device
*dev
= &rxe
->ib_dev
;
1195 struct crypto_shash
*tfm
;
1197 strlcpy(dev
->name
, "rxe%d", IB_DEVICE_NAME_MAX
);
1198 strlcpy(dev
->node_desc
, "rxe", sizeof(dev
->node_desc
));
1200 dev
->owner
= THIS_MODULE
;
1201 dev
->node_type
= RDMA_NODE_IB_CA
;
1202 dev
->phys_port_cnt
= 1;
1203 dev
->num_comp_vectors
= num_possible_cpus();
1204 dev
->dev
.parent
= rxe_dma_device(rxe
);
1205 dev
->local_dma_lkey
= 0;
1206 addrconf_addr_eui48((unsigned char *)&dev
->node_guid
,
1207 rxe
->ndev
->dev_addr
);
1208 dev
->dev
.dma_ops
= &dma_virt_ops
;
1209 dma_coerce_mask_and_coherent(&dev
->dev
,
1210 dma_get_required_mask(dev
->dev
.parent
));
1212 dev
->uverbs_abi_ver
= RXE_UVERBS_ABI_VERSION
;
1213 dev
->uverbs_cmd_mask
= BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT
)
1214 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
)
1215 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE
)
1216 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT
)
1217 | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD
)
1218 | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD
)
1219 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ
)
1220 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ
)
1221 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ
)
1222 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ
)
1223 | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV
)
1224 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP
)
1225 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP
)
1226 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP
)
1227 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP
)
1228 | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND
)
1229 | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV
)
1230 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ
)
1231 | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ
)
1232 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ
)
1233 | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ
)
1234 | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ
)
1235 | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
)
1236 | BIT_ULL(IB_USER_VERBS_CMD_REG_MR
)
1237 | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR
)
1238 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH
)
1239 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH
)
1240 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH
)
1241 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH
)
1242 | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST
)
1243 | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST
)
1246 dev
->query_device
= rxe_query_device
;
1247 dev
->modify_device
= rxe_modify_device
;
1248 dev
->query_port
= rxe_query_port
;
1249 dev
->modify_port
= rxe_modify_port
;
1250 dev
->get_link_layer
= rxe_get_link_layer
;
1251 dev
->query_gid
= rxe_query_gid
;
1252 dev
->get_netdev
= rxe_get_netdev
;
1253 dev
->add_gid
= rxe_add_gid
;
1254 dev
->del_gid
= rxe_del_gid
;
1255 dev
->query_pkey
= rxe_query_pkey
;
1256 dev
->alloc_ucontext
= rxe_alloc_ucontext
;
1257 dev
->dealloc_ucontext
= rxe_dealloc_ucontext
;
1258 dev
->mmap
= rxe_mmap
;
1259 dev
->get_port_immutable
= rxe_port_immutable
;
1260 dev
->alloc_pd
= rxe_alloc_pd
;
1261 dev
->dealloc_pd
= rxe_dealloc_pd
;
1262 dev
->create_ah
= rxe_create_ah
;
1263 dev
->modify_ah
= rxe_modify_ah
;
1264 dev
->query_ah
= rxe_query_ah
;
1265 dev
->destroy_ah
= rxe_destroy_ah
;
1266 dev
->create_srq
= rxe_create_srq
;
1267 dev
->modify_srq
= rxe_modify_srq
;
1268 dev
->query_srq
= rxe_query_srq
;
1269 dev
->destroy_srq
= rxe_destroy_srq
;
1270 dev
->post_srq_recv
= rxe_post_srq_recv
;
1271 dev
->create_qp
= rxe_create_qp
;
1272 dev
->modify_qp
= rxe_modify_qp
;
1273 dev
->query_qp
= rxe_query_qp
;
1274 dev
->destroy_qp
= rxe_destroy_qp
;
1275 dev
->post_send
= rxe_post_send
;
1276 dev
->post_recv
= rxe_post_recv
;
1277 dev
->create_cq
= rxe_create_cq
;
1278 dev
->destroy_cq
= rxe_destroy_cq
;
1279 dev
->resize_cq
= rxe_resize_cq
;
1280 dev
->poll_cq
= rxe_poll_cq
;
1281 dev
->peek_cq
= rxe_peek_cq
;
1282 dev
->req_notify_cq
= rxe_req_notify_cq
;
1283 dev
->get_dma_mr
= rxe_get_dma_mr
;
1284 dev
->reg_user_mr
= rxe_reg_user_mr
;
1285 dev
->dereg_mr
= rxe_dereg_mr
;
1286 dev
->alloc_mr
= rxe_alloc_mr
;
1287 dev
->map_mr_sg
= rxe_map_mr_sg
;
1288 dev
->attach_mcast
= rxe_attach_mcast
;
1289 dev
->detach_mcast
= rxe_detach_mcast
;
1290 dev
->get_hw_stats
= rxe_ib_get_hw_stats
;
1291 dev
->alloc_hw_stats
= rxe_ib_alloc_hw_stats
;
1293 tfm
= crypto_alloc_shash("crc32", 0, 0);
1295 pr_err("failed to allocate crc algorithm err:%ld\n",
1297 return PTR_ERR(tfm
);
1301 err
= ib_register_device(dev
, NULL
);
1303 pr_warn("%s failed with error %d\n", __func__
, err
);
1307 for (i
= 0; i
< ARRAY_SIZE(rxe_dev_attributes
); ++i
) {
1308 err
= device_create_file(&dev
->dev
, rxe_dev_attributes
[i
]);
1310 pr_warn("%s failed with error %d for attr number %d\n",
1319 ib_unregister_device(dev
);
1321 crypto_free_shash(rxe
->tfm
);
1326 int rxe_unregister_device(struct rxe_dev
*rxe
)
1329 struct ib_device
*dev
= &rxe
->ib_dev
;
1331 for (i
= 0; i
< ARRAY_SIZE(rxe_dev_attributes
); ++i
)
1332 device_remove_file(&dev
->dev
, rxe_dev_attributes
[i
]);
1334 ib_unregister_device(dev
);