2 * QEMU paravirtual RDMA - QP implementation
4 * Copyright (C) 2018 Oracle
5 * Copyright (C) 2018 Red Hat Inc
8 * Yuval Shaia <yuval.shaia@oracle.com>
9 * Marcel Apfelbaum <marcel@redhat.com>
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
16 #include "qemu/osdep.h"
18 #include "../rdma_utils.h"
19 #include "../rdma_rm.h"
20 #include "../rdma_backend.h"
25 #include "standard-headers/rdma/vmw_pvrdma-abi.h"
26 #include "pvrdma_qp_ops.h"
28 typedef struct CompHandlerCtx
{
31 struct pvrdma_cqe cqe
;
35 typedef struct PvrdmaSqWqe
{
36 struct pvrdma_sq_wqe_hdr hdr
;
37 struct pvrdma_sge sge
[0];
41 typedef struct PvrdmaRqWqe
{
42 struct pvrdma_rq_wqe_hdr hdr
;
43 struct pvrdma_sge sge
[0];
47 * 1. Put CQE on send CQ ring
48 * 2. Put CQ number on dsr completion ring
51 static int pvrdma_post_cqe(PVRDMADev
*dev
, uint32_t cq_handle
,
52 struct pvrdma_cqe
*cqe
, struct ibv_wc
*wc
)
54 struct pvrdma_cqe
*cqe1
;
55 struct pvrdma_cqne
*cqne
;
57 RdmaRmCQ
*cq
= rdma_rm_get_cq(&dev
->rdma_dev_res
, cq_handle
);
63 ring
= (PvrdmaRing
*)cq
->opaque
;
65 /* Step #1: Put CQE on CQ ring */
66 cqe1
= pvrdma_ring_next_elem_write(ring
);
67 if (unlikely(!cqe1
)) {
71 memset(cqe1
, 0, sizeof(*cqe1
));
72 cqe1
->wr_id
= cqe
->wr_id
;
73 cqe1
->qp
= cqe
->qp
? cqe
->qp
: wc
->qp_num
;
74 cqe1
->opcode
= cqe
->opcode
;
75 cqe1
->status
= wc
->status
;
76 cqe1
->byte_len
= wc
->byte_len
;
77 cqe1
->src_qp
= wc
->src_qp
;
78 cqe1
->wc_flags
= wc
->wc_flags
;
79 cqe1
->vendor_err
= wc
->vendor_err
;
81 trace_pvrdma_post_cqe(cq_handle
, cq
->notify
, cqe1
->wr_id
, cqe1
->qp
,
82 cqe1
->opcode
, cqe1
->status
, cqe1
->byte_len
,
83 cqe1
->src_qp
, cqe1
->wc_flags
, cqe1
->vendor_err
);
85 pvrdma_ring_write_inc(ring
);
87 /* Step #2: Put CQ number on dsr completion ring */
88 cqne
= pvrdma_ring_next_elem_write(&dev
->dsr_info
.cq
);
89 if (unlikely(!cqne
)) {
93 cqne
->info
= cq_handle
;
94 pvrdma_ring_write_inc(&dev
->dsr_info
.cq
);
96 if (cq
->notify
!= CNT_CLEAR
) {
97 if (cq
->notify
== CNT_ARM
) {
98 cq
->notify
= CNT_CLEAR
;
100 post_interrupt(dev
, INTR_VEC_CMD_COMPLETION_Q
);
106 static void pvrdma_qp_ops_comp_handler(void *ctx
, struct ibv_wc
*wc
)
108 CompHandlerCtx
*comp_ctx
= (CompHandlerCtx
*)ctx
;
110 pvrdma_post_cqe(comp_ctx
->dev
, comp_ctx
->cq_handle
, &comp_ctx
->cqe
, wc
);
115 static void complete_with_error(uint32_t vendor_err
, void *ctx
)
117 struct ibv_wc wc
= {};
119 wc
.status
= IBV_WC_GENERAL_ERR
;
120 wc
.vendor_err
= vendor_err
;
122 pvrdma_qp_ops_comp_handler(ctx
, &wc
);
125 void pvrdma_qp_ops_fini(void)
127 rdma_backend_unregister_comp_handler();
130 int pvrdma_qp_ops_init(void)
132 rdma_backend_register_comp_handler(pvrdma_qp_ops_comp_handler
);
137 void pvrdma_qp_send(PVRDMADev
*dev
, uint32_t qp_handle
)
145 qp
= rdma_rm_get_qp(&dev
->rdma_dev_res
, qp_handle
);
150 ring
= (PvrdmaRing
*)qp
->opaque
;
152 wqe
= (struct PvrdmaSqWqe
*)pvrdma_ring_next_elem_read(ring
);
154 CompHandlerCtx
*comp_ctx
;
157 comp_ctx
= g_malloc(sizeof(CompHandlerCtx
));
159 comp_ctx
->cq_handle
= qp
->send_cq_handle
;
160 comp_ctx
->cqe
.wr_id
= wqe
->hdr
.wr_id
;
161 comp_ctx
->cqe
.qp
= qp_handle
;
162 comp_ctx
->cqe
.opcode
= IBV_WC_SEND
;
164 sgid
= rdma_rm_get_gid(&dev
->rdma_dev_res
, wqe
->hdr
.wr
.ud
.av
.gid_index
);
166 rdma_error_report("Failed to get gid for idx %d",
167 wqe
->hdr
.wr
.ud
.av
.gid_index
);
168 complete_with_error(VENDOR_ERR_INV_GID_IDX
, comp_ctx
);
172 sgid_idx
= rdma_rm_get_backend_gid_index(&dev
->rdma_dev_res
,
174 wqe
->hdr
.wr
.ud
.av
.gid_index
);
176 rdma_error_report("Failed to get bk sgid_idx for sgid_idx %d",
177 wqe
->hdr
.wr
.ud
.av
.gid_index
);
178 complete_with_error(VENDOR_ERR_INV_GID_IDX
, comp_ctx
);
182 if (wqe
->hdr
.num_sge
> dev
->dev_attr
.max_sge
) {
183 rdma_error_report("Invalid num_sge=%d (max %d)", wqe
->hdr
.num_sge
,
184 dev
->dev_attr
.max_sge
);
185 complete_with_error(VENDOR_ERR_INV_NUM_SGE
, comp_ctx
);
189 rdma_backend_post_send(&dev
->backend_dev
, &qp
->backend_qp
, qp
->qp_type
,
190 (struct ibv_sge
*)&wqe
->sge
[0], wqe
->hdr
.num_sge
,
192 (union ibv_gid
*)wqe
->hdr
.wr
.ud
.av
.dgid
,
193 wqe
->hdr
.wr
.ud
.remote_qpn
,
194 wqe
->hdr
.wr
.ud
.remote_qkey
, comp_ctx
);
196 pvrdma_ring_read_inc(ring
);
198 wqe
= pvrdma_ring_next_elem_read(ring
);
202 void pvrdma_qp_recv(PVRDMADev
*dev
, uint32_t qp_handle
)
208 qp
= rdma_rm_get_qp(&dev
->rdma_dev_res
, qp_handle
);
213 ring
= &((PvrdmaRing
*)qp
->opaque
)[1];
215 wqe
= (struct PvrdmaRqWqe
*)pvrdma_ring_next_elem_read(ring
);
217 CompHandlerCtx
*comp_ctx
;
220 comp_ctx
= g_malloc(sizeof(CompHandlerCtx
));
222 comp_ctx
->cq_handle
= qp
->recv_cq_handle
;
223 comp_ctx
->cqe
.wr_id
= wqe
->hdr
.wr_id
;
224 comp_ctx
->cqe
.qp
= qp_handle
;
225 comp_ctx
->cqe
.opcode
= IBV_WC_RECV
;
227 if (wqe
->hdr
.num_sge
> dev
->dev_attr
.max_sge
) {
228 rdma_error_report("Invalid num_sge=%d (max %d)", wqe
->hdr
.num_sge
,
229 dev
->dev_attr
.max_sge
);
230 complete_with_error(VENDOR_ERR_INV_NUM_SGE
, comp_ctx
);
234 rdma_backend_post_recv(&dev
->backend_dev
, &qp
->backend_qp
, qp
->qp_type
,
235 (struct ibv_sge
*)&wqe
->sge
[0], wqe
->hdr
.num_sge
,
238 pvrdma_ring_read_inc(ring
);
240 wqe
= pvrdma_ring_next_elem_read(ring
);
244 void pvrdma_srq_recv(PVRDMADev
*dev
, uint32_t srq_handle
)
250 srq
= rdma_rm_get_srq(&dev
->rdma_dev_res
, srq_handle
);
251 if (unlikely(!srq
)) {
255 ring
= (PvrdmaRing
*)srq
->opaque
;
257 wqe
= (struct PvrdmaRqWqe
*)pvrdma_ring_next_elem_read(ring
);
259 CompHandlerCtx
*comp_ctx
;
262 comp_ctx
= g_malloc(sizeof(CompHandlerCtx
));
264 comp_ctx
->cq_handle
= srq
->recv_cq_handle
;
265 comp_ctx
->cqe
.wr_id
= wqe
->hdr
.wr_id
;
266 comp_ctx
->cqe
.qp
= 0;
267 comp_ctx
->cqe
.opcode
= IBV_WC_RECV
;
269 if (wqe
->hdr
.num_sge
> dev
->dev_attr
.max_sge
) {
270 rdma_error_report("Invalid num_sge=%d (max %d)", wqe
->hdr
.num_sge
,
271 dev
->dev_attr
.max_sge
);
272 complete_with_error(VENDOR_ERR_INV_NUM_SGE
, comp_ctx
);
276 rdma_backend_post_srq_recv(&dev
->backend_dev
, &srq
->backend_srq
,
277 (struct ibv_sge
*)&wqe
->sge
[0],
281 pvrdma_ring_read_inc(ring
);
283 wqe
= pvrdma_ring_next_elem_read(ring
);
288 void pvrdma_cq_poll(RdmaDeviceResources
*dev_res
, uint32_t cq_handle
)
292 cq
= rdma_rm_get_cq(dev_res
, cq_handle
);
297 rdma_backend_poll_cq(dev_res
, &cq
->backend_cq
);