2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/kref.h>
34 #include <rdma/ib_umem.h>
38 static void mlx5_ib_cq_comp(struct mlx5_core_cq
*cq
)
40 struct ib_cq
*ibcq
= &to_mibcq(cq
)->ibcq
;
42 ibcq
->comp_handler(ibcq
, ibcq
->cq_context
);
45 static void mlx5_ib_cq_event(struct mlx5_core_cq
*mcq
, enum mlx5_event type
)
47 struct mlx5_ib_cq
*cq
= container_of(mcq
, struct mlx5_ib_cq
, mcq
);
48 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
49 struct ib_cq
*ibcq
= &cq
->ibcq
;
50 struct ib_event event
;
52 if (type
!= MLX5_EVENT_TYPE_CQ_ERROR
) {
53 mlx5_ib_warn(dev
, "Unexpected event type %d on CQ %06x\n",
58 if (ibcq
->event_handler
) {
59 event
.device
= &dev
->ib_dev
;
60 event
.event
= IB_EVENT_CQ_ERR
;
61 event
.element
.cq
= ibcq
;
62 ibcq
->event_handler(&event
, ibcq
->cq_context
);
66 static void *get_cqe_from_buf(struct mlx5_ib_cq_buf
*buf
, int n
, int size
)
68 return mlx5_buf_offset(&buf
->buf
, n
* size
);
71 static void *get_cqe(struct mlx5_ib_cq
*cq
, int n
)
73 return get_cqe_from_buf(&cq
->buf
, n
, cq
->mcq
.cqe_sz
);
76 static u8
sw_ownership_bit(int n
, int nent
)
78 return (n
& nent
) ? 1 : 0;
81 static void *get_sw_cqe(struct mlx5_ib_cq
*cq
, int n
)
83 void *cqe
= get_cqe(cq
, n
& cq
->ibcq
.cqe
);
84 struct mlx5_cqe64
*cqe64
;
86 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
88 if (likely((cqe64
->op_own
) >> 4 != MLX5_CQE_INVALID
) &&
89 !((cqe64
->op_own
& MLX5_CQE_OWNER_MASK
) ^ !!(n
& (cq
->ibcq
.cqe
+ 1)))) {
96 static void *next_cqe_sw(struct mlx5_ib_cq
*cq
)
98 return get_sw_cqe(cq
, cq
->mcq
.cons_index
);
101 static enum ib_wc_opcode
get_umr_comp(struct mlx5_ib_wq
*wq
, int idx
)
103 switch (wq
->wr_data
[idx
]) {
107 case IB_WR_LOCAL_INV
:
108 return IB_WC_LOCAL_INV
;
110 case IB_WR_FAST_REG_MR
:
111 return IB_WC_FAST_REG_MR
;
114 pr_warn("unknown completion status\n");
119 static void handle_good_req(struct ib_wc
*wc
, struct mlx5_cqe64
*cqe
,
120 struct mlx5_ib_wq
*wq
, int idx
)
123 switch (be32_to_cpu(cqe
->sop_drop_qpn
) >> 24) {
124 case MLX5_OPCODE_RDMA_WRITE_IMM
:
125 wc
->wc_flags
|= IB_WC_WITH_IMM
;
126 case MLX5_OPCODE_RDMA_WRITE
:
127 wc
->opcode
= IB_WC_RDMA_WRITE
;
129 case MLX5_OPCODE_SEND_IMM
:
130 wc
->wc_flags
|= IB_WC_WITH_IMM
;
131 case MLX5_OPCODE_SEND
:
132 case MLX5_OPCODE_SEND_INVAL
:
133 wc
->opcode
= IB_WC_SEND
;
135 case MLX5_OPCODE_RDMA_READ
:
136 wc
->opcode
= IB_WC_RDMA_READ
;
137 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
139 case MLX5_OPCODE_ATOMIC_CS
:
140 wc
->opcode
= IB_WC_COMP_SWAP
;
143 case MLX5_OPCODE_ATOMIC_FA
:
144 wc
->opcode
= IB_WC_FETCH_ADD
;
147 case MLX5_OPCODE_ATOMIC_MASKED_CS
:
148 wc
->opcode
= IB_WC_MASKED_COMP_SWAP
;
151 case MLX5_OPCODE_ATOMIC_MASKED_FA
:
152 wc
->opcode
= IB_WC_MASKED_FETCH_ADD
;
155 case MLX5_OPCODE_BIND_MW
:
156 wc
->opcode
= IB_WC_BIND_MW
;
158 case MLX5_OPCODE_UMR
:
159 wc
->opcode
= get_umr_comp(wq
, idx
);
165 MLX5_GRH_IN_BUFFER
= 1,
169 static void handle_responder(struct ib_wc
*wc
, struct mlx5_cqe64
*cqe
,
170 struct mlx5_ib_qp
*qp
)
172 struct mlx5_ib_dev
*dev
= to_mdev(qp
->ibqp
.device
);
173 struct mlx5_ib_srq
*srq
;
174 struct mlx5_ib_wq
*wq
;
178 if (qp
->ibqp
.srq
|| qp
->ibqp
.xrcd
) {
179 struct mlx5_core_srq
*msrq
= NULL
;
182 msrq
= mlx5_core_get_srq(&dev
->mdev
,
183 be32_to_cpu(cqe
->srqn
));
184 srq
= to_mibsrq(msrq
);
186 srq
= to_msrq(qp
->ibqp
.srq
);
189 wqe_ctr
= be16_to_cpu(cqe
->wqe_counter
);
190 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
191 mlx5_ib_free_srq_wqe(srq
, wqe_ctr
);
192 if (msrq
&& atomic_dec_and_test(&msrq
->refcount
))
193 complete(&msrq
->free
);
197 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
200 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
202 switch (cqe
->op_own
>> 4) {
203 case MLX5_CQE_RESP_WR_IMM
:
204 wc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
205 wc
->wc_flags
= IB_WC_WITH_IMM
;
206 wc
->ex
.imm_data
= cqe
->imm_inval_pkey
;
208 case MLX5_CQE_RESP_SEND
:
209 wc
->opcode
= IB_WC_RECV
;
212 case MLX5_CQE_RESP_SEND_IMM
:
213 wc
->opcode
= IB_WC_RECV
;
214 wc
->wc_flags
= IB_WC_WITH_IMM
;
215 wc
->ex
.imm_data
= cqe
->imm_inval_pkey
;
217 case MLX5_CQE_RESP_SEND_INV
:
218 wc
->opcode
= IB_WC_RECV
;
219 wc
->wc_flags
= IB_WC_WITH_INVALIDATE
;
220 wc
->ex
.invalidate_rkey
= be32_to_cpu(cqe
->imm_inval_pkey
);
223 wc
->slid
= be16_to_cpu(cqe
->slid
);
224 wc
->sl
= (be32_to_cpu(cqe
->flags_rqpn
) >> 24) & 0xf;
225 wc
->src_qp
= be32_to_cpu(cqe
->flags_rqpn
) & 0xffffff;
226 wc
->dlid_path_bits
= cqe
->ml_path
;
227 g
= (be32_to_cpu(cqe
->flags_rqpn
) >> 28) & 3;
228 wc
->wc_flags
|= g
? IB_WC_GRH
: 0;
229 wc
->pkey_index
= be32_to_cpu(cqe
->imm_inval_pkey
) & 0xffff;
232 static void dump_cqe(struct mlx5_ib_dev
*dev
, struct mlx5_err_cqe
*cqe
)
234 __be32
*p
= (__be32
*)cqe
;
237 mlx5_ib_warn(dev
, "dump error cqe\n");
238 for (i
= 0; i
< sizeof(*cqe
) / 16; i
++, p
+= 4)
239 pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p
[0]),
240 be32_to_cpu(p
[1]), be32_to_cpu(p
[2]),
244 static void mlx5_handle_error_cqe(struct mlx5_ib_dev
*dev
,
245 struct mlx5_err_cqe
*cqe
,
250 switch (cqe
->syndrome
) {
251 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR
:
252 wc
->status
= IB_WC_LOC_LEN_ERR
;
254 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR
:
255 wc
->status
= IB_WC_LOC_QP_OP_ERR
;
257 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR
:
258 wc
->status
= IB_WC_LOC_PROT_ERR
;
260 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR
:
262 wc
->status
= IB_WC_WR_FLUSH_ERR
;
264 case MLX5_CQE_SYNDROME_MW_BIND_ERR
:
265 wc
->status
= IB_WC_MW_BIND_ERR
;
267 case MLX5_CQE_SYNDROME_BAD_RESP_ERR
:
268 wc
->status
= IB_WC_BAD_RESP_ERR
;
270 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR
:
271 wc
->status
= IB_WC_LOC_ACCESS_ERR
;
273 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR
:
274 wc
->status
= IB_WC_REM_INV_REQ_ERR
;
276 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR
:
277 wc
->status
= IB_WC_REM_ACCESS_ERR
;
279 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR
:
280 wc
->status
= IB_WC_REM_OP_ERR
;
282 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR
:
283 wc
->status
= IB_WC_RETRY_EXC_ERR
;
286 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR
:
287 wc
->status
= IB_WC_RNR_RETRY_EXC_ERR
;
290 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR
:
291 wc
->status
= IB_WC_REM_ABORT_ERR
;
294 wc
->status
= IB_WC_GENERAL_ERR
;
298 wc
->vendor_err
= cqe
->vendor_err_synd
;
303 static int is_atomic_response(struct mlx5_ib_qp
*qp
, uint16_t idx
)
305 /* TBD: waiting decision
310 static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp
*qp
, uint16_t idx
)
312 struct mlx5_wqe_data_seg
*dpseg
;
315 dpseg
= mlx5_get_send_wqe(qp
, idx
) + sizeof(struct mlx5_wqe_ctrl_seg
) +
316 sizeof(struct mlx5_wqe_raddr_seg
) +
317 sizeof(struct mlx5_wqe_atomic_seg
);
318 addr
= (void *)(unsigned long)be64_to_cpu(dpseg
->addr
);
322 static void handle_atomic(struct mlx5_ib_qp
*qp
, struct mlx5_cqe64
*cqe64
,
329 if (!is_atomic_response(qp
, idx
))
332 byte_count
= be32_to_cpu(cqe64
->byte_cnt
);
333 addr
= mlx5_get_atomic_laddr(qp
, idx
);
335 if (byte_count
== 4) {
336 *(uint32_t *)addr
= be32_to_cpu(*((__be32
*)addr
));
338 for (i
= 0; i
< byte_count
; i
+= 8) {
339 *(uint64_t *)addr
= be64_to_cpu(*((__be64
*)addr
));
347 static void handle_atomics(struct mlx5_ib_qp
*qp
, struct mlx5_cqe64
*cqe64
,
353 idx
= tail
& (qp
->sq
.wqe_cnt
- 1);
354 handle_atomic(qp
, cqe64
, idx
);
358 tail
= qp
->sq
.w_list
[idx
].next
;
360 tail
= qp
->sq
.w_list
[idx
].next
;
361 qp
->sq
.last_poll
= tail
;
364 static void free_cq_buf(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq_buf
*buf
)
366 mlx5_buf_free(&dev
->mdev
, &buf
->buf
);
369 static int mlx5_poll_one(struct mlx5_ib_cq
*cq
,
370 struct mlx5_ib_qp
**cur_qp
,
373 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
374 struct mlx5_err_cqe
*err_cqe
;
375 struct mlx5_cqe64
*cqe64
;
376 struct mlx5_core_qp
*mqp
;
377 struct mlx5_ib_wq
*wq
;
385 cqe
= next_cqe_sw(cq
);
389 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
391 ++cq
->mcq
.cons_index
;
393 /* Make sure we read CQ entry contents after we've checked the
398 opcode
= cqe64
->op_own
>> 4;
399 if (unlikely(opcode
== MLX5_CQE_RESIZE_CQ
)) {
400 if (likely(cq
->resize_buf
)) {
401 free_cq_buf(dev
, &cq
->buf
);
402 cq
->buf
= *cq
->resize_buf
;
403 kfree(cq
->resize_buf
);
404 cq
->resize_buf
= NULL
;
407 mlx5_ib_warn(dev
, "unexpected resize cqe\n");
411 qpn
= ntohl(cqe64
->sop_drop_qpn
) & 0xffffff;
412 if (!*cur_qp
|| (qpn
!= (*cur_qp
)->ibqp
.qp_num
)) {
413 /* We do not have to take the QP table lock here,
414 * because CQs will be locked while QPs are removed
417 mqp
= __mlx5_qp_lookup(&dev
->mdev
, qpn
);
418 if (unlikely(!mqp
)) {
419 mlx5_ib_warn(dev
, "CQE@CQ %06x for unknown QPN %6x\n",
424 *cur_qp
= to_mibqp(mqp
);
427 wc
->qp
= &(*cur_qp
)->ibqp
;
431 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
432 idx
= wqe_ctr
& (wq
->wqe_cnt
- 1);
433 handle_good_req(wc
, cqe64
, wq
, idx
);
434 handle_atomics(*cur_qp
, cqe64
, wq
->last_poll
, idx
);
435 wc
->wr_id
= wq
->wrid
[idx
];
436 wq
->tail
= wq
->wqe_head
[idx
] + 1;
437 wc
->status
= IB_WC_SUCCESS
;
439 case MLX5_CQE_RESP_WR_IMM
:
440 case MLX5_CQE_RESP_SEND
:
441 case MLX5_CQE_RESP_SEND_IMM
:
442 case MLX5_CQE_RESP_SEND_INV
:
443 handle_responder(wc
, cqe64
, *cur_qp
);
444 wc
->status
= IB_WC_SUCCESS
;
446 case MLX5_CQE_RESIZE_CQ
:
448 case MLX5_CQE_REQ_ERR
:
449 case MLX5_CQE_RESP_ERR
:
450 err_cqe
= (struct mlx5_err_cqe
*)cqe64
;
451 mlx5_handle_error_cqe(dev
, err_cqe
, wc
);
452 mlx5_ib_dbg(dev
, "%s error cqe on cqn 0x%x:\n",
453 opcode
== MLX5_CQE_REQ_ERR
?
454 "Requestor" : "Responder", cq
->mcq
.cqn
);
455 mlx5_ib_dbg(dev
, "syndrome 0x%x, vendor syndrome 0x%x\n",
456 err_cqe
->syndrome
, err_cqe
->vendor_err_synd
);
457 if (opcode
== MLX5_CQE_REQ_ERR
) {
459 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
460 idx
= wqe_ctr
& (wq
->wqe_cnt
- 1);
461 wc
->wr_id
= wq
->wrid
[idx
];
462 wq
->tail
= wq
->wqe_head
[idx
] + 1;
464 struct mlx5_ib_srq
*srq
;
466 if ((*cur_qp
)->ibqp
.srq
) {
467 srq
= to_msrq((*cur_qp
)->ibqp
.srq
);
468 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
469 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
470 mlx5_ib_free_srq_wqe(srq
, wqe_ctr
);
473 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
483 int mlx5_ib_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
485 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
486 struct mlx5_ib_qp
*cur_qp
= NULL
;
491 spin_lock_irqsave(&cq
->lock
, flags
);
493 for (npolled
= 0; npolled
< num_entries
; npolled
++) {
494 err
= mlx5_poll_one(cq
, &cur_qp
, wc
+ npolled
);
500 mlx5_cq_set_ci(&cq
->mcq
);
502 spin_unlock_irqrestore(&cq
->lock
, flags
);
504 if (err
== 0 || err
== -EAGAIN
)
510 int mlx5_ib_arm_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
512 mlx5_cq_arm(&to_mcq(ibcq
)->mcq
,
513 (flags
& IB_CQ_SOLICITED_MASK
) == IB_CQ_SOLICITED
?
514 MLX5_CQ_DB_REQ_NOT_SOL
: MLX5_CQ_DB_REQ_NOT
,
515 to_mdev(ibcq
->device
)->mdev
.priv
.uuari
.uars
[0].map
,
516 MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq
->device
)->mdev
.priv
.cq_uar_lock
));
521 static int alloc_cq_buf(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq_buf
*buf
,
522 int nent
, int cqe_size
)
526 err
= mlx5_buf_alloc(&dev
->mdev
, nent
* cqe_size
,
527 PAGE_SIZE
* 2, &buf
->buf
);
531 buf
->cqe_size
= cqe_size
;
537 static int create_cq_user(struct mlx5_ib_dev
*dev
, struct ib_udata
*udata
,
538 struct ib_ucontext
*context
, struct mlx5_ib_cq
*cq
,
539 int entries
, struct mlx5_create_cq_mbox_in
**cqb
,
540 int *cqe_size
, int *index
, int *inlen
)
542 struct mlx5_ib_create_cq ucmd
;
548 if (ib_copy_from_udata(&ucmd
, udata
, sizeof(ucmd
)))
551 if (ucmd
.cqe_size
!= 64 && ucmd
.cqe_size
!= 128)
554 *cqe_size
= ucmd
.cqe_size
;
556 cq
->buf
.umem
= ib_umem_get(context
, ucmd
.buf_addr
,
557 entries
* ucmd
.cqe_size
,
558 IB_ACCESS_LOCAL_WRITE
, 1);
559 if (IS_ERR(cq
->buf
.umem
)) {
560 err
= PTR_ERR(cq
->buf
.umem
);
564 err
= mlx5_ib_db_map_user(to_mucontext(context
), ucmd
.db_addr
,
569 mlx5_ib_cont_pages(cq
->buf
.umem
, ucmd
.buf_addr
, &npages
, &page_shift
,
571 mlx5_ib_dbg(dev
, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
572 ucmd
.buf_addr
, entries
* ucmd
.cqe_size
, npages
, page_shift
, ncont
);
574 *inlen
= sizeof(**cqb
) + sizeof(*(*cqb
)->pas
) * ncont
;
575 *cqb
= mlx5_vzalloc(*inlen
);
580 mlx5_ib_populate_pas(dev
, cq
->buf
.umem
, page_shift
, (*cqb
)->pas
, 0);
581 (*cqb
)->ctx
.log_pg_sz
= page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
583 *index
= to_mucontext(context
)->uuari
.uars
[0].index
;
588 mlx5_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
591 ib_umem_release(cq
->buf
.umem
);
595 static void destroy_cq_user(struct mlx5_ib_cq
*cq
, struct ib_ucontext
*context
)
597 mlx5_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
598 ib_umem_release(cq
->buf
.umem
);
601 static void init_cq_buf(struct mlx5_ib_cq
*cq
, struct mlx5_ib_cq_buf
*buf
)
605 struct mlx5_cqe64
*cqe64
;
607 for (i
= 0; i
< buf
->nent
; i
++) {
608 cqe
= get_cqe_from_buf(buf
, i
, buf
->cqe_size
);
609 cqe64
= buf
->cqe_size
== 64 ? cqe
: cqe
+ 64;
610 cqe64
->op_own
= MLX5_CQE_INVALID
<< 4;
614 static int create_cq_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
615 int entries
, int cqe_size
,
616 struct mlx5_create_cq_mbox_in
**cqb
,
617 int *index
, int *inlen
)
621 err
= mlx5_db_alloc(&dev
->mdev
, &cq
->db
);
625 cq
->mcq
.set_ci_db
= cq
->db
.db
;
626 cq
->mcq
.arm_db
= cq
->db
.db
+ 1;
627 *cq
->mcq
.set_ci_db
= 0;
629 cq
->mcq
.cqe_sz
= cqe_size
;
631 err
= alloc_cq_buf(dev
, &cq
->buf
, entries
, cqe_size
);
635 init_cq_buf(cq
, &cq
->buf
);
637 *inlen
= sizeof(**cqb
) + sizeof(*(*cqb
)->pas
) * cq
->buf
.buf
.npages
;
638 *cqb
= mlx5_vzalloc(*inlen
);
643 mlx5_fill_page_array(&cq
->buf
.buf
, (*cqb
)->pas
);
645 (*cqb
)->ctx
.log_pg_sz
= cq
->buf
.buf
.page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
646 *index
= dev
->mdev
.priv
.uuari
.uars
[0].index
;
651 free_cq_buf(dev
, &cq
->buf
);
654 mlx5_db_free(&dev
->mdev
, &cq
->db
);
658 static void destroy_cq_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
)
660 free_cq_buf(dev
, &cq
->buf
);
661 mlx5_db_free(&dev
->mdev
, &cq
->db
);
664 struct ib_cq
*mlx5_ib_create_cq(struct ib_device
*ibdev
, int entries
,
665 int vector
, struct ib_ucontext
*context
,
666 struct ib_udata
*udata
)
668 struct mlx5_create_cq_mbox_in
*cqb
= NULL
;
669 struct mlx5_ib_dev
*dev
= to_mdev(ibdev
);
670 struct mlx5_ib_cq
*cq
;
671 int uninitialized_var(index
);
672 int uninitialized_var(inlen
);
679 return ERR_PTR(-EINVAL
);
681 entries
= roundup_pow_of_two(entries
+ 1);
682 if (entries
> dev
->mdev
.caps
.max_cqes
)
683 return ERR_PTR(-EINVAL
);
685 cq
= kzalloc(sizeof(*cq
), GFP_KERNEL
);
687 return ERR_PTR(-ENOMEM
);
689 cq
->ibcq
.cqe
= entries
- 1;
690 mutex_init(&cq
->resize_mutex
);
691 spin_lock_init(&cq
->lock
);
692 cq
->resize_buf
= NULL
;
693 cq
->resize_umem
= NULL
;
696 err
= create_cq_user(dev
, udata
, context
, cq
, entries
,
697 &cqb
, &cqe_size
, &index
, &inlen
);
701 /* for now choose 64 bytes till we have a proper interface */
703 err
= create_cq_kernel(dev
, cq
, entries
, cqe_size
, &cqb
,
709 cq
->cqe_size
= cqe_size
;
710 cqb
->ctx
.cqe_sz_flags
= cqe_sz_to_mlx_sz(cqe_size
) << 5;
711 cqb
->ctx
.log_sz_usr_page
= cpu_to_be32((ilog2(entries
) << 24) | index
);
712 err
= mlx5_vector2eqn(dev
, vector
, &eqn
, &irqn
);
716 cqb
->ctx
.c_eqn
= cpu_to_be16(eqn
);
717 cqb
->ctx
.db_record_addr
= cpu_to_be64(cq
->db
.dma
);
719 err
= mlx5_core_create_cq(&dev
->mdev
, &cq
->mcq
, cqb
, inlen
);
723 mlx5_ib_dbg(dev
, "cqn 0x%x\n", cq
->mcq
.cqn
);
725 cq
->mcq
.comp
= mlx5_ib_cq_comp
;
726 cq
->mcq
.event
= mlx5_ib_cq_event
;
729 if (ib_copy_to_udata(udata
, &cq
->mcq
.cqn
, sizeof(__u32
))) {
739 mlx5_core_destroy_cq(&dev
->mdev
, &cq
->mcq
);
744 destroy_cq_user(cq
, context
);
746 destroy_cq_kernel(dev
, cq
);
755 int mlx5_ib_destroy_cq(struct ib_cq
*cq
)
757 struct mlx5_ib_dev
*dev
= to_mdev(cq
->device
);
758 struct mlx5_ib_cq
*mcq
= to_mcq(cq
);
759 struct ib_ucontext
*context
= NULL
;
762 context
= cq
->uobject
->context
;
764 mlx5_core_destroy_cq(&dev
->mdev
, &mcq
->mcq
);
766 destroy_cq_user(mcq
, context
);
768 destroy_cq_kernel(dev
, mcq
);
775 static int is_equal_rsn(struct mlx5_cqe64
*cqe64
, u32 rsn
)
777 return rsn
== (ntohl(cqe64
->sop_drop_qpn
) & 0xffffff);
780 void __mlx5_ib_cq_clean(struct mlx5_ib_cq
*cq
, u32 rsn
, struct mlx5_ib_srq
*srq
)
782 struct mlx5_cqe64
*cqe64
, *dest64
;
791 /* First we need to find the current producer index, so we
792 * know where to start cleaning from. It doesn't matter if HW
793 * adds new entries after this loop -- the QP we're worried
794 * about is already in RESET, so the new entries won't come
795 * from our QP and therefore don't need to be checked.
797 for (prod_index
= cq
->mcq
.cons_index
; get_sw_cqe(cq
, prod_index
); prod_index
++)
798 if (prod_index
== cq
->mcq
.cons_index
+ cq
->ibcq
.cqe
)
801 /* Now sweep backwards through the CQ, removing CQ entries
802 * that match our QP by copying older entries on top of them.
804 while ((int) --prod_index
- (int) cq
->mcq
.cons_index
>= 0) {
805 cqe
= get_cqe(cq
, prod_index
& cq
->ibcq
.cqe
);
806 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
807 if (is_equal_rsn(cqe64
, rsn
)) {
808 if (srq
&& (ntohl(cqe64
->srqn
) & 0xffffff))
809 mlx5_ib_free_srq_wqe(srq
, be16_to_cpu(cqe64
->wqe_counter
));
812 dest
= get_cqe(cq
, (prod_index
+ nfreed
) & cq
->ibcq
.cqe
);
813 dest64
= (cq
->mcq
.cqe_sz
== 64) ? dest
: dest
+ 64;
814 owner_bit
= dest64
->op_own
& MLX5_CQE_OWNER_MASK
;
815 memcpy(dest
, cqe
, cq
->mcq
.cqe_sz
);
816 dest64
->op_own
= owner_bit
|
817 (dest64
->op_own
& ~MLX5_CQE_OWNER_MASK
);
822 cq
->mcq
.cons_index
+= nfreed
;
823 /* Make sure update of buffer contents is done before
824 * updating consumer index.
827 mlx5_cq_set_ci(&cq
->mcq
);
831 void mlx5_ib_cq_clean(struct mlx5_ib_cq
*cq
, u32 qpn
, struct mlx5_ib_srq
*srq
)
836 spin_lock_irq(&cq
->lock
);
837 __mlx5_ib_cq_clean(cq
, qpn
, srq
);
838 spin_unlock_irq(&cq
->lock
);
841 int mlx5_ib_modify_cq(struct ib_cq
*cq
, u16 cq_count
, u16 cq_period
)
843 struct mlx5_modify_cq_mbox_in
*in
;
844 struct mlx5_ib_dev
*dev
= to_mdev(cq
->device
);
845 struct mlx5_ib_cq
*mcq
= to_mcq(cq
);
849 if (!(dev
->mdev
.caps
.flags
& MLX5_DEV_CAP_FLAG_CQ_MODER
))
852 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
856 in
->cqn
= cpu_to_be32(mcq
->mcq
.cqn
);
857 fsel
= (MLX5_CQ_MODIFY_PERIOD
| MLX5_CQ_MODIFY_COUNT
);
858 in
->ctx
.cq_period
= cpu_to_be16(cq_period
);
859 in
->ctx
.cq_max_count
= cpu_to_be16(cq_count
);
860 in
->field_select
= cpu_to_be32(fsel
);
861 err
= mlx5_core_modify_cq(&dev
->mdev
, &mcq
->mcq
, in
, sizeof(*in
));
865 mlx5_ib_warn(dev
, "modify cq 0x%x failed\n", mcq
->mcq
.cqn
);
870 static int resize_user(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
871 int entries
, struct ib_udata
*udata
, int *npas
,
872 int *page_shift
, int *cqe_size
)
874 struct mlx5_ib_resize_cq ucmd
;
875 struct ib_umem
*umem
;
878 struct ib_ucontext
*context
= cq
->buf
.umem
->context
;
880 err
= ib_copy_from_udata(&ucmd
, udata
, sizeof(ucmd
));
884 if (ucmd
.reserved0
|| ucmd
.reserved1
)
887 umem
= ib_umem_get(context
, ucmd
.buf_addr
, entries
* ucmd
.cqe_size
,
888 IB_ACCESS_LOCAL_WRITE
, 1);
894 mlx5_ib_cont_pages(umem
, ucmd
.buf_addr
, &npages
, page_shift
,
897 cq
->resize_umem
= umem
;
898 *cqe_size
= ucmd
.cqe_size
;
903 static void un_resize_user(struct mlx5_ib_cq
*cq
)
905 ib_umem_release(cq
->resize_umem
);
908 static int resize_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
909 int entries
, int cqe_size
)
913 cq
->resize_buf
= kzalloc(sizeof(*cq
->resize_buf
), GFP_KERNEL
);
917 err
= alloc_cq_buf(dev
, cq
->resize_buf
, entries
, cqe_size
);
921 init_cq_buf(cq
, cq
->resize_buf
);
926 kfree(cq
->resize_buf
);
930 static void un_resize_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
)
932 free_cq_buf(dev
, cq
->resize_buf
);
933 cq
->resize_buf
= NULL
;
936 static int copy_resize_cqes(struct mlx5_ib_cq
*cq
)
938 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
939 struct mlx5_cqe64
*scqe64
;
940 struct mlx5_cqe64
*dcqe64
;
949 ssize
= cq
->buf
.cqe_size
;
950 dsize
= cq
->resize_buf
->cqe_size
;
951 if (ssize
!= dsize
) {
952 mlx5_ib_warn(dev
, "resize from different cqe size is not supported\n");
956 i
= cq
->mcq
.cons_index
;
957 scqe
= get_sw_cqe(cq
, i
);
958 scqe64
= ssize
== 64 ? scqe
: scqe
+ 64;
961 mlx5_ib_warn(dev
, "expected cqe in sw ownership\n");
965 while ((scqe64
->op_own
>> 4) != MLX5_CQE_RESIZE_CQ
) {
966 dcqe
= get_cqe_from_buf(cq
->resize_buf
,
967 (i
+ 1) & (cq
->resize_buf
->nent
),
969 dcqe64
= dsize
== 64 ? dcqe
: dcqe
+ 64;
970 sw_own
= sw_ownership_bit(i
+ 1, cq
->resize_buf
->nent
);
971 memcpy(dcqe
, scqe
, dsize
);
972 dcqe64
->op_own
= (dcqe64
->op_own
& ~MLX5_CQE_OWNER_MASK
) | sw_own
;
975 scqe
= get_sw_cqe(cq
, i
);
976 scqe64
= ssize
== 64 ? scqe
: scqe
+ 64;
978 mlx5_ib_warn(dev
, "expected cqe in sw ownership\n");
982 if (scqe
== start_cqe
) {
983 pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
988 ++cq
->mcq
.cons_index
;
992 int mlx5_ib_resize_cq(struct ib_cq
*ibcq
, int entries
, struct ib_udata
*udata
)
994 struct mlx5_ib_dev
*dev
= to_mdev(ibcq
->device
);
995 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
996 struct mlx5_modify_cq_mbox_in
*in
;
1001 int uninitialized_var(cqe_size
);
1002 unsigned long flags
;
1004 if (!(dev
->mdev
.caps
.flags
& MLX5_DEV_CAP_FLAG_RESIZE_CQ
)) {
1005 pr_info("Firmware does not support resize CQ\n");
1012 entries
= roundup_pow_of_two(entries
+ 1);
1013 if (entries
> dev
->mdev
.caps
.max_cqes
+ 1)
1016 if (entries
== ibcq
->cqe
+ 1)
1019 mutex_lock(&cq
->resize_mutex
);
1021 err
= resize_user(dev
, cq
, entries
, udata
, &npas
, &page_shift
,
1025 err
= resize_kernel(dev
, cq
, entries
, cqe_size
);
1027 npas
= cq
->resize_buf
->buf
.npages
;
1028 page_shift
= cq
->resize_buf
->buf
.page_shift
;
1035 inlen
= sizeof(*in
) + npas
* sizeof(in
->pas
[0]);
1036 in
= mlx5_vzalloc(inlen
);
1043 mlx5_ib_populate_pas(dev
, cq
->resize_umem
, page_shift
,
1046 mlx5_fill_page_array(&cq
->resize_buf
->buf
, in
->pas
);
1048 in
->field_select
= cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE
|
1049 MLX5_MODIFY_CQ_MASK_PG_OFFSET
|
1050 MLX5_MODIFY_CQ_MASK_PG_SIZE
);
1051 in
->ctx
.log_pg_sz
= page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
1052 in
->ctx
.cqe_sz_flags
= cqe_sz_to_mlx_sz(cqe_size
) << 5;
1053 in
->ctx
.page_offset
= 0;
1054 in
->ctx
.log_sz_usr_page
= cpu_to_be32(ilog2(entries
) << 24);
1055 in
->hdr
.opmod
= cpu_to_be16(MLX5_CQ_OPMOD_RESIZE
);
1056 in
->cqn
= cpu_to_be32(cq
->mcq
.cqn
);
1058 err
= mlx5_core_modify_cq(&dev
->mdev
, &cq
->mcq
, in
, inlen
);
1063 cq
->ibcq
.cqe
= entries
- 1;
1064 ib_umem_release(cq
->buf
.umem
);
1065 cq
->buf
.umem
= cq
->resize_umem
;
1066 cq
->resize_umem
= NULL
;
1068 struct mlx5_ib_cq_buf tbuf
;
1071 spin_lock_irqsave(&cq
->lock
, flags
);
1072 if (cq
->resize_buf
) {
1073 err
= copy_resize_cqes(cq
);
1076 cq
->buf
= *cq
->resize_buf
;
1077 kfree(cq
->resize_buf
);
1078 cq
->resize_buf
= NULL
;
1082 cq
->ibcq
.cqe
= entries
- 1;
1083 spin_unlock_irqrestore(&cq
->lock
, flags
);
1085 free_cq_buf(dev
, &tbuf
);
1087 mutex_unlock(&cq
->resize_mutex
);
1099 un_resize_kernel(dev
, cq
);
1101 mutex_unlock(&cq
->resize_mutex
);
1105 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev
*dev
, struct ib_cq
*ibcq
)
1107 struct mlx5_ib_cq
*cq
;
1113 return cq
->cqe_size
;