2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/dma-mapping.h>
35 #include <net/addrconf.h>
36 #include <rdma/uverbs_ioctl.h>
39 #include "rxe_queue.h"
40 #include "rxe_hw_counters.h"
42 static int rxe_query_device(struct ib_device
*dev
,
43 struct ib_device_attr
*attr
,
46 struct rxe_dev
*rxe
= to_rdev(dev
);
48 if (uhw
->inlen
|| uhw
->outlen
)
55 static int rxe_query_port(struct ib_device
*dev
,
56 u8 port_num
, struct ib_port_attr
*attr
)
58 struct rxe_dev
*rxe
= to_rdev(dev
);
59 struct rxe_port
*port
;
64 /* *attr being zeroed by the caller, avoid zeroing it here */
67 mutex_lock(&rxe
->usdev_lock
);
68 rc
= ib_get_eth_speed(dev
, port_num
, &attr
->active_speed
,
71 if (attr
->state
== IB_PORT_ACTIVE
)
72 attr
->phys_state
= IB_PORT_PHYS_STATE_LINK_UP
;
73 else if (dev_get_flags(rxe
->ndev
) & IFF_UP
)
74 attr
->phys_state
= IB_PORT_PHYS_STATE_POLLING
;
76 attr
->phys_state
= IB_PORT_PHYS_STATE_DISABLED
;
78 mutex_unlock(&rxe
->usdev_lock
);
83 static int rxe_query_pkey(struct ib_device
*device
,
84 u8 port_num
, u16 index
, u16
*pkey
)
86 struct rxe_dev
*rxe
= to_rdev(device
);
87 struct rxe_port
*port
;
91 if (unlikely(index
>= port
->attr
.pkey_tbl_len
)) {
92 dev_warn(device
->dev
.parent
, "invalid index = %d\n",
97 *pkey
= port
->pkey_tbl
[index
];
104 static int rxe_modify_device(struct ib_device
*dev
,
105 int mask
, struct ib_device_modify
*attr
)
107 struct rxe_dev
*rxe
= to_rdev(dev
);
109 if (mask
& ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID
|
110 IB_DEVICE_MODIFY_NODE_DESC
))
113 if (mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
)
114 rxe
->attr
.sys_image_guid
= cpu_to_be64(attr
->sys_image_guid
);
116 if (mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
117 memcpy(rxe
->ib_dev
.node_desc
,
118 attr
->node_desc
, sizeof(rxe
->ib_dev
.node_desc
));
124 static int rxe_modify_port(struct ib_device
*dev
,
125 u8 port_num
, int mask
, struct ib_port_modify
*attr
)
127 struct rxe_dev
*rxe
= to_rdev(dev
);
128 struct rxe_port
*port
;
132 port
->attr
.port_cap_flags
|= attr
->set_port_cap_mask
;
133 port
->attr
.port_cap_flags
&= ~attr
->clr_port_cap_mask
;
135 if (mask
& IB_PORT_RESET_QKEY_CNTR
)
136 port
->attr
.qkey_viol_cntr
= 0;
141 static enum rdma_link_layer
rxe_get_link_layer(struct ib_device
*dev
,
144 struct rxe_dev
*rxe
= to_rdev(dev
);
146 return rxe_link_layer(rxe
, port_num
);
149 static int rxe_alloc_ucontext(struct ib_ucontext
*uctx
, struct ib_udata
*udata
)
151 struct rxe_dev
*rxe
= to_rdev(uctx
->device
);
152 struct rxe_ucontext
*uc
= to_ruc(uctx
);
154 return rxe_add_to_pool(&rxe
->uc_pool
, &uc
->pelem
);
157 static void rxe_dealloc_ucontext(struct ib_ucontext
*ibuc
)
159 struct rxe_ucontext
*uc
= to_ruc(ibuc
);
164 static int rxe_port_immutable(struct ib_device
*dev
, u8 port_num
,
165 struct ib_port_immutable
*immutable
)
168 struct ib_port_attr attr
;
170 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP
;
172 err
= ib_query_port(dev
, port_num
, &attr
);
176 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
177 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
178 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
183 static int rxe_alloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
185 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
186 struct rxe_pd
*pd
= to_rpd(ibpd
);
188 return rxe_add_to_pool(&rxe
->pd_pool
, &pd
->pelem
);
191 static void rxe_dealloc_pd(struct ib_pd
*ibpd
, struct ib_udata
*udata
)
193 struct rxe_pd
*pd
= to_rpd(ibpd
);
198 static int rxe_create_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
,
199 u32 flags
, struct ib_udata
*udata
)
203 struct rxe_dev
*rxe
= to_rdev(ibah
->device
);
204 struct rxe_ah
*ah
= to_rah(ibah
);
206 err
= rxe_av_chk_attr(rxe
, attr
);
210 err
= rxe_add_to_pool(&rxe
->ah_pool
, &ah
->pelem
);
214 rxe_init_av(attr
, &ah
->av
);
218 static int rxe_modify_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
)
221 struct rxe_dev
*rxe
= to_rdev(ibah
->device
);
222 struct rxe_ah
*ah
= to_rah(ibah
);
224 err
= rxe_av_chk_attr(rxe
, attr
);
228 rxe_init_av(attr
, &ah
->av
);
232 static int rxe_query_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
)
234 struct rxe_ah
*ah
= to_rah(ibah
);
236 memset(attr
, 0, sizeof(*attr
));
237 attr
->type
= ibah
->type
;
238 rxe_av_to_attr(&ah
->av
, attr
);
242 static void rxe_destroy_ah(struct ib_ah
*ibah
, u32 flags
)
244 struct rxe_ah
*ah
= to_rah(ibah
);
249 static int post_one_recv(struct rxe_rq
*rq
, const struct ib_recv_wr
*ibwr
)
254 struct rxe_recv_wqe
*recv_wqe
;
255 int num_sge
= ibwr
->num_sge
;
257 if (unlikely(queue_full(rq
->queue
))) {
262 if (unlikely(num_sge
> rq
->max_sge
)) {
268 for (i
= 0; i
< num_sge
; i
++)
269 length
+= ibwr
->sg_list
[i
].length
;
271 recv_wqe
= producer_addr(rq
->queue
);
272 recv_wqe
->wr_id
= ibwr
->wr_id
;
273 recv_wqe
->num_sge
= num_sge
;
275 memcpy(recv_wqe
->dma
.sge
, ibwr
->sg_list
,
276 num_sge
* sizeof(struct ib_sge
));
278 recv_wqe
->dma
.length
= length
;
279 recv_wqe
->dma
.resid
= length
;
280 recv_wqe
->dma
.num_sge
= num_sge
;
281 recv_wqe
->dma
.cur_sge
= 0;
282 recv_wqe
->dma
.sge_offset
= 0;
284 /* make sure all changes to the work queue are written before we
285 * update the producer pointer
289 advance_producer(rq
->queue
);
296 static int rxe_create_srq(struct ib_srq
*ibsrq
, struct ib_srq_init_attr
*init
,
297 struct ib_udata
*udata
)
300 struct rxe_dev
*rxe
= to_rdev(ibsrq
->device
);
301 struct rxe_pd
*pd
= to_rpd(ibsrq
->pd
);
302 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
303 struct rxe_create_srq_resp __user
*uresp
= NULL
;
306 if (udata
->outlen
< sizeof(*uresp
))
308 uresp
= udata
->outbuf
;
311 err
= rxe_srq_chk_attr(rxe
, NULL
, &init
->attr
, IB_SRQ_INIT_MASK
);
315 err
= rxe_add_to_pool(&rxe
->srq_pool
, &srq
->pelem
);
322 err
= rxe_srq_from_init(rxe
, srq
, init
, udata
, uresp
);
335 static int rxe_modify_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
,
336 enum ib_srq_attr_mask mask
,
337 struct ib_udata
*udata
)
340 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
341 struct rxe_dev
*rxe
= to_rdev(ibsrq
->device
);
342 struct rxe_modify_srq_cmd ucmd
= {};
345 if (udata
->inlen
< sizeof(ucmd
))
348 err
= ib_copy_from_udata(&ucmd
, udata
, sizeof(ucmd
));
353 err
= rxe_srq_chk_attr(rxe
, srq
, attr
, mask
);
357 err
= rxe_srq_from_attr(rxe
, srq
, attr
, mask
, &ucmd
, udata
);
367 static int rxe_query_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
)
369 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
374 attr
->max_wr
= srq
->rq
.queue
->buf
->index_mask
;
375 attr
->max_sge
= srq
->rq
.max_sge
;
376 attr
->srq_limit
= srq
->limit
;
380 static void rxe_destroy_srq(struct ib_srq
*ibsrq
, struct ib_udata
*udata
)
382 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
385 rxe_queue_cleanup(srq
->rq
.queue
);
387 rxe_drop_ref(srq
->pd
);
391 static int rxe_post_srq_recv(struct ib_srq
*ibsrq
, const struct ib_recv_wr
*wr
,
392 const struct ib_recv_wr
**bad_wr
)
396 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
398 spin_lock_irqsave(&srq
->rq
.producer_lock
, flags
);
401 err
= post_one_recv(&srq
->rq
, wr
);
407 spin_unlock_irqrestore(&srq
->rq
.producer_lock
, flags
);
415 static struct ib_qp
*rxe_create_qp(struct ib_pd
*ibpd
,
416 struct ib_qp_init_attr
*init
,
417 struct ib_udata
*udata
)
420 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
421 struct rxe_pd
*pd
= to_rpd(ibpd
);
423 struct rxe_create_qp_resp __user
*uresp
= NULL
;
426 if (udata
->outlen
< sizeof(*uresp
))
427 return ERR_PTR(-EINVAL
);
428 uresp
= udata
->outbuf
;
431 err
= rxe_qp_chk_init(rxe
, init
);
435 qp
= rxe_alloc(&rxe
->qp_pool
);
451 err
= rxe_qp_from_init(rxe
, qp
, pd
, init
, uresp
, ibpd
, udata
);
465 static int rxe_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
466 int mask
, struct ib_udata
*udata
)
469 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
470 struct rxe_qp
*qp
= to_rqp(ibqp
);
472 err
= rxe_qp_chk_attr(rxe
, qp
, attr
, mask
);
476 err
= rxe_qp_from_attr(qp
, attr
, mask
, udata
);
486 static int rxe_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
487 int mask
, struct ib_qp_init_attr
*init
)
489 struct rxe_qp
*qp
= to_rqp(ibqp
);
491 rxe_qp_to_init(qp
, init
);
492 rxe_qp_to_attr(qp
, attr
, mask
);
497 static int rxe_destroy_qp(struct ib_qp
*ibqp
, struct ib_udata
*udata
)
499 struct rxe_qp
*qp
= to_rqp(ibqp
);
507 static int validate_send_wr(struct rxe_qp
*qp
, const struct ib_send_wr
*ibwr
,
508 unsigned int mask
, unsigned int length
)
510 int num_sge
= ibwr
->num_sge
;
511 struct rxe_sq
*sq
= &qp
->sq
;
513 if (unlikely(num_sge
> sq
->max_sge
))
516 if (unlikely(mask
& WR_ATOMIC_MASK
)) {
520 if (atomic_wr(ibwr
)->remote_addr
& 0x7)
524 if (unlikely((ibwr
->send_flags
& IB_SEND_INLINE
) &&
525 (length
> sq
->max_inline
)))
534 static void init_send_wr(struct rxe_qp
*qp
, struct rxe_send_wr
*wr
,
535 const struct ib_send_wr
*ibwr
)
537 wr
->wr_id
= ibwr
->wr_id
;
538 wr
->num_sge
= ibwr
->num_sge
;
539 wr
->opcode
= ibwr
->opcode
;
540 wr
->send_flags
= ibwr
->send_flags
;
542 if (qp_type(qp
) == IB_QPT_UD
||
543 qp_type(qp
) == IB_QPT_SMI
||
544 qp_type(qp
) == IB_QPT_GSI
) {
545 wr
->wr
.ud
.remote_qpn
= ud_wr(ibwr
)->remote_qpn
;
546 wr
->wr
.ud
.remote_qkey
= ud_wr(ibwr
)->remote_qkey
;
547 if (qp_type(qp
) == IB_QPT_GSI
)
548 wr
->wr
.ud
.pkey_index
= ud_wr(ibwr
)->pkey_index
;
549 if (wr
->opcode
== IB_WR_SEND_WITH_IMM
)
550 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
552 switch (wr
->opcode
) {
553 case IB_WR_RDMA_WRITE_WITH_IMM
:
554 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
556 case IB_WR_RDMA_READ
:
557 case IB_WR_RDMA_WRITE
:
558 wr
->wr
.rdma
.remote_addr
= rdma_wr(ibwr
)->remote_addr
;
559 wr
->wr
.rdma
.rkey
= rdma_wr(ibwr
)->rkey
;
561 case IB_WR_SEND_WITH_IMM
:
562 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
564 case IB_WR_SEND_WITH_INV
:
565 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
567 case IB_WR_ATOMIC_CMP_AND_SWP
:
568 case IB_WR_ATOMIC_FETCH_AND_ADD
:
569 wr
->wr
.atomic
.remote_addr
=
570 atomic_wr(ibwr
)->remote_addr
;
571 wr
->wr
.atomic
.compare_add
=
572 atomic_wr(ibwr
)->compare_add
;
573 wr
->wr
.atomic
.swap
= atomic_wr(ibwr
)->swap
;
574 wr
->wr
.atomic
.rkey
= atomic_wr(ibwr
)->rkey
;
576 case IB_WR_LOCAL_INV
:
577 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
580 wr
->wr
.reg
.mr
= reg_wr(ibwr
)->mr
;
581 wr
->wr
.reg
.key
= reg_wr(ibwr
)->key
;
582 wr
->wr
.reg
.access
= reg_wr(ibwr
)->access
;
590 static int init_send_wqe(struct rxe_qp
*qp
, const struct ib_send_wr
*ibwr
,
591 unsigned int mask
, unsigned int length
,
592 struct rxe_send_wqe
*wqe
)
594 int num_sge
= ibwr
->num_sge
;
599 init_send_wr(qp
, &wqe
->wr
, ibwr
);
601 if (qp_type(qp
) == IB_QPT_UD
||
602 qp_type(qp
) == IB_QPT_SMI
||
603 qp_type(qp
) == IB_QPT_GSI
)
604 memcpy(&wqe
->av
, &to_rah(ud_wr(ibwr
)->ah
)->av
, sizeof(wqe
->av
));
606 if (unlikely(ibwr
->send_flags
& IB_SEND_INLINE
)) {
607 p
= wqe
->dma
.inline_data
;
610 for (i
= 0; i
< num_sge
; i
++, sge
++) {
611 memcpy(p
, (void *)(uintptr_t)sge
->addr
,
616 } else if (mask
& WR_REG_MASK
) {
618 wqe
->state
= wqe_state_posted
;
621 memcpy(wqe
->dma
.sge
, ibwr
->sg_list
,
622 num_sge
* sizeof(struct ib_sge
));
624 wqe
->iova
= mask
& WR_ATOMIC_MASK
? atomic_wr(ibwr
)->remote_addr
:
625 mask
& WR_READ_OR_WRITE_MASK
? rdma_wr(ibwr
)->remote_addr
: 0;
627 wqe
->dma
.length
= length
;
628 wqe
->dma
.resid
= length
;
629 wqe
->dma
.num_sge
= num_sge
;
630 wqe
->dma
.cur_sge
= 0;
631 wqe
->dma
.sge_offset
= 0;
632 wqe
->state
= wqe_state_posted
;
633 wqe
->ssn
= atomic_add_return(1, &qp
->ssn
);
638 static int post_one_send(struct rxe_qp
*qp
, const struct ib_send_wr
*ibwr
,
639 unsigned int mask
, u32 length
)
642 struct rxe_sq
*sq
= &qp
->sq
;
643 struct rxe_send_wqe
*send_wqe
;
646 err
= validate_send_wr(qp
, ibwr
, mask
, length
);
650 spin_lock_irqsave(&qp
->sq
.sq_lock
, flags
);
652 if (unlikely(queue_full(sq
->queue
))) {
657 send_wqe
= producer_addr(sq
->queue
);
659 err
= init_send_wqe(qp
, ibwr
, mask
, length
, send_wqe
);
664 * make sure all changes to the work queue are
665 * written before we update the producer pointer
669 advance_producer(sq
->queue
);
670 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
675 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
679 static int rxe_post_send_kernel(struct rxe_qp
*qp
, const struct ib_send_wr
*wr
,
680 const struct ib_send_wr
**bad_wr
)
684 unsigned int length
= 0;
688 mask
= wr_opcode_mask(wr
->opcode
, qp
);
689 if (unlikely(!mask
)) {
695 if (unlikely((wr
->send_flags
& IB_SEND_INLINE
) &&
696 !(mask
& WR_INLINE_MASK
))) {
703 for (i
= 0; i
< wr
->num_sge
; i
++)
704 length
+= wr
->sg_list
[i
].length
;
706 err
= post_one_send(qp
, wr
, mask
, length
);
715 rxe_run_task(&qp
->req
.task
, 1);
716 if (unlikely(qp
->req
.state
== QP_STATE_ERROR
))
717 rxe_run_task(&qp
->comp
.task
, 1);
722 static int rxe_post_send(struct ib_qp
*ibqp
, const struct ib_send_wr
*wr
,
723 const struct ib_send_wr
**bad_wr
)
725 struct rxe_qp
*qp
= to_rqp(ibqp
);
727 if (unlikely(!qp
->valid
)) {
732 if (unlikely(qp
->req
.state
< QP_STATE_READY
)) {
738 /* Utilize process context to do protocol processing */
739 rxe_run_task(&qp
->req
.task
, 0);
742 return rxe_post_send_kernel(qp
, wr
, bad_wr
);
745 static int rxe_post_recv(struct ib_qp
*ibqp
, const struct ib_recv_wr
*wr
,
746 const struct ib_recv_wr
**bad_wr
)
749 struct rxe_qp
*qp
= to_rqp(ibqp
);
750 struct rxe_rq
*rq
= &qp
->rq
;
753 if (unlikely((qp_state(qp
) < IB_QPS_INIT
) || !qp
->valid
)) {
759 if (unlikely(qp
->srq
)) {
765 spin_lock_irqsave(&rq
->producer_lock
, flags
);
768 err
= post_one_recv(rq
, wr
);
776 spin_unlock_irqrestore(&rq
->producer_lock
, flags
);
778 if (qp
->resp
.state
== QP_STATE_ERROR
)
779 rxe_run_task(&qp
->resp
.task
, 1);
785 static int rxe_create_cq(struct ib_cq
*ibcq
, const struct ib_cq_init_attr
*attr
,
786 struct ib_udata
*udata
)
789 struct ib_device
*dev
= ibcq
->device
;
790 struct rxe_dev
*rxe
= to_rdev(dev
);
791 struct rxe_cq
*cq
= to_rcq(ibcq
);
792 struct rxe_create_cq_resp __user
*uresp
= NULL
;
795 if (udata
->outlen
< sizeof(*uresp
))
797 uresp
= udata
->outbuf
;
803 err
= rxe_cq_chk_attr(rxe
, NULL
, attr
->cqe
, attr
->comp_vector
);
807 err
= rxe_cq_from_init(rxe
, cq
, attr
->cqe
, attr
->comp_vector
, udata
,
812 return rxe_add_to_pool(&rxe
->cq_pool
, &cq
->pelem
);
815 static void rxe_destroy_cq(struct ib_cq
*ibcq
, struct ib_udata
*udata
)
817 struct rxe_cq
*cq
= to_rcq(ibcq
);
824 static int rxe_resize_cq(struct ib_cq
*ibcq
, int cqe
, struct ib_udata
*udata
)
827 struct rxe_cq
*cq
= to_rcq(ibcq
);
828 struct rxe_dev
*rxe
= to_rdev(ibcq
->device
);
829 struct rxe_resize_cq_resp __user
*uresp
= NULL
;
832 if (udata
->outlen
< sizeof(*uresp
))
834 uresp
= udata
->outbuf
;
837 err
= rxe_cq_chk_attr(rxe
, cq
, cqe
, 0);
841 err
= rxe_cq_resize_queue(cq
, cqe
, uresp
, udata
);
851 static int rxe_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
854 struct rxe_cq
*cq
= to_rcq(ibcq
);
858 spin_lock_irqsave(&cq
->cq_lock
, flags
);
859 for (i
= 0; i
< num_entries
; i
++) {
860 cqe
= queue_head(cq
->queue
);
864 memcpy(wc
++, &cqe
->ibwc
, sizeof(*wc
));
865 advance_consumer(cq
->queue
);
867 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
872 static int rxe_peek_cq(struct ib_cq
*ibcq
, int wc_cnt
)
874 struct rxe_cq
*cq
= to_rcq(ibcq
);
875 int count
= queue_count(cq
->queue
);
877 return (count
> wc_cnt
) ? wc_cnt
: count
;
880 static int rxe_req_notify_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
882 struct rxe_cq
*cq
= to_rcq(ibcq
);
883 unsigned long irq_flags
;
886 spin_lock_irqsave(&cq
->cq_lock
, irq_flags
);
887 if (cq
->notify
!= IB_CQ_NEXT_COMP
)
888 cq
->notify
= flags
& IB_CQ_SOLICITED_MASK
;
890 if ((flags
& IB_CQ_REPORT_MISSED_EVENTS
) && !queue_empty(cq
->queue
))
893 spin_unlock_irqrestore(&cq
->cq_lock
, irq_flags
);
898 static struct ib_mr
*rxe_get_dma_mr(struct ib_pd
*ibpd
, int access
)
900 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
901 struct rxe_pd
*pd
= to_rpd(ibpd
);
905 mr
= rxe_alloc(&rxe
->mr_pool
);
915 err
= rxe_mem_init_dma(pd
, access
, mr
);
929 static struct ib_mr
*rxe_reg_user_mr(struct ib_pd
*ibpd
,
933 int access
, struct ib_udata
*udata
)
936 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
937 struct rxe_pd
*pd
= to_rpd(ibpd
);
940 mr
= rxe_alloc(&rxe
->mr_pool
);
950 err
= rxe_mem_init_user(pd
, start
, length
, iova
,
965 static int rxe_dereg_mr(struct ib_mr
*ibmr
, struct ib_udata
*udata
)
967 struct rxe_mem
*mr
= to_rmr(ibmr
);
969 mr
->state
= RXE_MEM_STATE_ZOMBIE
;
970 rxe_drop_ref(mr
->pd
);
976 static struct ib_mr
*rxe_alloc_mr(struct ib_pd
*ibpd
, enum ib_mr_type mr_type
,
977 u32 max_num_sg
, struct ib_udata
*udata
)
979 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
980 struct rxe_pd
*pd
= to_rpd(ibpd
);
984 if (mr_type
!= IB_MR_TYPE_MEM_REG
)
985 return ERR_PTR(-EINVAL
);
987 mr
= rxe_alloc(&rxe
->mr_pool
);
997 err
= rxe_mem_init_fast(pd
, max_num_sg
, mr
);
1008 return ERR_PTR(err
);
1011 static int rxe_set_page(struct ib_mr
*ibmr
, u64 addr
)
1013 struct rxe_mem
*mr
= to_rmr(ibmr
);
1014 struct rxe_map
*map
;
1015 struct rxe_phys_buf
*buf
;
1017 if (unlikely(mr
->nbuf
== mr
->num_buf
))
1020 map
= mr
->map
[mr
->nbuf
/ RXE_BUF_PER_MAP
];
1021 buf
= &map
->buf
[mr
->nbuf
% RXE_BUF_PER_MAP
];
1024 buf
->size
= ibmr
->page_size
;
1030 static int rxe_map_mr_sg(struct ib_mr
*ibmr
, struct scatterlist
*sg
,
1031 int sg_nents
, unsigned int *sg_offset
)
1033 struct rxe_mem
*mr
= to_rmr(ibmr
);
1038 n
= ib_sg_to_pages(ibmr
, sg
, sg_nents
, sg_offset
, rxe_set_page
);
1040 mr
->va
= ibmr
->iova
;
1041 mr
->iova
= ibmr
->iova
;
1042 mr
->length
= ibmr
->length
;
1043 mr
->page_shift
= ilog2(ibmr
->page_size
);
1044 mr
->page_mask
= ibmr
->page_size
- 1;
1045 mr
->offset
= mr
->iova
& mr
->page_mask
;
1050 static int rxe_attach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1053 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1054 struct rxe_qp
*qp
= to_rqp(ibqp
);
1055 struct rxe_mc_grp
*grp
;
1057 /* takes a ref on grp if successful */
1058 err
= rxe_mcast_get_grp(rxe
, mgid
, &grp
);
1062 err
= rxe_mcast_add_grp_elem(rxe
, qp
, grp
);
1068 static int rxe_detach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1070 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1071 struct rxe_qp
*qp
= to_rqp(ibqp
);
1073 return rxe_mcast_drop_grp_elem(rxe
, qp
, mgid
);
1076 static ssize_t
parent_show(struct device
*device
,
1077 struct device_attribute
*attr
, char *buf
)
1079 struct rxe_dev
*rxe
=
1080 rdma_device_to_drv_device(device
, struct rxe_dev
, ib_dev
);
1082 return snprintf(buf
, 16, "%s\n", rxe_parent_name(rxe
, 1));
1085 static DEVICE_ATTR_RO(parent
);
1087 static struct attribute
*rxe_dev_attributes
[] = {
1088 &dev_attr_parent
.attr
,
1092 static const struct attribute_group rxe_attr_group
= {
1093 .attrs
= rxe_dev_attributes
,
1096 static int rxe_enable_driver(struct ib_device
*ib_dev
)
1098 struct rxe_dev
*rxe
= container_of(ib_dev
, struct rxe_dev
, ib_dev
);
1100 rxe_set_port_state(rxe
);
1101 dev_info(&rxe
->ib_dev
.dev
, "added %s\n", netdev_name(rxe
->ndev
));
1105 static const struct ib_device_ops rxe_dev_ops
= {
1106 .owner
= THIS_MODULE
,
1107 .driver_id
= RDMA_DRIVER_RXE
,
1108 .uverbs_abi_ver
= RXE_UVERBS_ABI_VERSION
,
1110 .alloc_hw_stats
= rxe_ib_alloc_hw_stats
,
1111 .alloc_mr
= rxe_alloc_mr
,
1112 .alloc_pd
= rxe_alloc_pd
,
1113 .alloc_ucontext
= rxe_alloc_ucontext
,
1114 .attach_mcast
= rxe_attach_mcast
,
1115 .create_ah
= rxe_create_ah
,
1116 .create_cq
= rxe_create_cq
,
1117 .create_qp
= rxe_create_qp
,
1118 .create_srq
= rxe_create_srq
,
1119 .dealloc_driver
= rxe_dealloc
,
1120 .dealloc_pd
= rxe_dealloc_pd
,
1121 .dealloc_ucontext
= rxe_dealloc_ucontext
,
1122 .dereg_mr
= rxe_dereg_mr
,
1123 .destroy_ah
= rxe_destroy_ah
,
1124 .destroy_cq
= rxe_destroy_cq
,
1125 .destroy_qp
= rxe_destroy_qp
,
1126 .destroy_srq
= rxe_destroy_srq
,
1127 .detach_mcast
= rxe_detach_mcast
,
1128 .enable_driver
= rxe_enable_driver
,
1129 .get_dma_mr
= rxe_get_dma_mr
,
1130 .get_hw_stats
= rxe_ib_get_hw_stats
,
1131 .get_link_layer
= rxe_get_link_layer
,
1132 .get_port_immutable
= rxe_port_immutable
,
1133 .map_mr_sg
= rxe_map_mr_sg
,
1135 .modify_ah
= rxe_modify_ah
,
1136 .modify_device
= rxe_modify_device
,
1137 .modify_port
= rxe_modify_port
,
1138 .modify_qp
= rxe_modify_qp
,
1139 .modify_srq
= rxe_modify_srq
,
1140 .peek_cq
= rxe_peek_cq
,
1141 .poll_cq
= rxe_poll_cq
,
1142 .post_recv
= rxe_post_recv
,
1143 .post_send
= rxe_post_send
,
1144 .post_srq_recv
= rxe_post_srq_recv
,
1145 .query_ah
= rxe_query_ah
,
1146 .query_device
= rxe_query_device
,
1147 .query_pkey
= rxe_query_pkey
,
1148 .query_port
= rxe_query_port
,
1149 .query_qp
= rxe_query_qp
,
1150 .query_srq
= rxe_query_srq
,
1151 .reg_user_mr
= rxe_reg_user_mr
,
1152 .req_notify_cq
= rxe_req_notify_cq
,
1153 .resize_cq
= rxe_resize_cq
,
1155 INIT_RDMA_OBJ_SIZE(ib_ah
, rxe_ah
, ibah
),
1156 INIT_RDMA_OBJ_SIZE(ib_cq
, rxe_cq
, ibcq
),
1157 INIT_RDMA_OBJ_SIZE(ib_pd
, rxe_pd
, ibpd
),
1158 INIT_RDMA_OBJ_SIZE(ib_srq
, rxe_srq
, ibsrq
),
1159 INIT_RDMA_OBJ_SIZE(ib_ucontext
, rxe_ucontext
, ibuc
),
1162 int rxe_register_device(struct rxe_dev
*rxe
, const char *ibdev_name
)
1165 struct ib_device
*dev
= &rxe
->ib_dev
;
1166 struct crypto_shash
*tfm
;
1168 strlcpy(dev
->node_desc
, "rxe", sizeof(dev
->node_desc
));
1170 dev
->node_type
= RDMA_NODE_IB_CA
;
1171 dev
->phys_port_cnt
= 1;
1172 dev
->num_comp_vectors
= num_possible_cpus();
1173 dev
->dev
.parent
= rxe_dma_device(rxe
);
1174 dev
->local_dma_lkey
= 0;
1175 addrconf_addr_eui48((unsigned char *)&dev
->node_guid
,
1176 rxe
->ndev
->dev_addr
);
1177 dev
->dev
.dma_ops
= &dma_virt_ops
;
1178 dev
->dev
.dma_parms
= &rxe
->dma_parms
;
1179 rxe
->dma_parms
= (struct device_dma_parameters
)
1180 { .max_segment_size
= SZ_2G
};
1181 dma_coerce_mask_and_coherent(&dev
->dev
,
1182 dma_get_required_mask(&dev
->dev
));
1184 dev
->uverbs_cmd_mask
= BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT
)
1185 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
)
1186 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE
)
1187 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT
)
1188 | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD
)
1189 | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD
)
1190 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ
)
1191 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ
)
1192 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ
)
1193 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ
)
1194 | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV
)
1195 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP
)
1196 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP
)
1197 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP
)
1198 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP
)
1199 | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND
)
1200 | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV
)
1201 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ
)
1202 | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ
)
1203 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ
)
1204 | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ
)
1205 | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ
)
1206 | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
)
1207 | BIT_ULL(IB_USER_VERBS_CMD_REG_MR
)
1208 | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR
)
1209 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH
)
1210 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH
)
1211 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH
)
1212 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH
)
1213 | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST
)
1214 | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST
)
1217 ib_set_device_ops(dev
, &rxe_dev_ops
);
1218 err
= ib_device_set_netdev(&rxe
->ib_dev
, rxe
->ndev
, 1);
1222 tfm
= crypto_alloc_shash("crc32", 0, 0);
1224 pr_err("failed to allocate crc algorithm err:%ld\n",
1226 return PTR_ERR(tfm
);
1230 rdma_set_device_sysfs_group(dev
, &rxe_attr_group
);
1231 err
= ib_register_device(dev
, ibdev_name
);
1233 pr_warn("%s failed with error %d\n", __func__
, err
);
1236 * Note that rxe may be invalid at this point if another thread