2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/kref.h>
34 #include <rdma/ib_umem.h>
35 #include <rdma/ib_user_verbs.h>
36 #include <rdma/ib_cache.h>
40 static void mlx5_ib_cq_comp(struct mlx5_core_cq
*cq
)
42 struct ib_cq
*ibcq
= &to_mibcq(cq
)->ibcq
;
44 ibcq
->comp_handler(ibcq
, ibcq
->cq_context
);
47 static void mlx5_ib_cq_event(struct mlx5_core_cq
*mcq
, enum mlx5_event type
)
49 struct mlx5_ib_cq
*cq
= container_of(mcq
, struct mlx5_ib_cq
, mcq
);
50 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
51 struct ib_cq
*ibcq
= &cq
->ibcq
;
52 struct ib_event event
;
54 if (type
!= MLX5_EVENT_TYPE_CQ_ERROR
) {
55 mlx5_ib_warn(dev
, "Unexpected event type %d on CQ %06x\n",
60 if (ibcq
->event_handler
) {
61 event
.device
= &dev
->ib_dev
;
62 event
.event
= IB_EVENT_CQ_ERR
;
63 event
.element
.cq
= ibcq
;
64 ibcq
->event_handler(&event
, ibcq
->cq_context
);
68 static void *get_cqe_from_buf(struct mlx5_ib_cq_buf
*buf
, int n
, int size
)
70 return mlx5_buf_offset(&buf
->buf
, n
* size
);
73 static void *get_cqe(struct mlx5_ib_cq
*cq
, int n
)
75 return get_cqe_from_buf(&cq
->buf
, n
, cq
->mcq
.cqe_sz
);
78 static u8
sw_ownership_bit(int n
, int nent
)
80 return (n
& nent
) ? 1 : 0;
83 static void *get_sw_cqe(struct mlx5_ib_cq
*cq
, int n
)
85 void *cqe
= get_cqe(cq
, n
& cq
->ibcq
.cqe
);
86 struct mlx5_cqe64
*cqe64
;
88 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
90 if (likely((cqe64
->op_own
) >> 4 != MLX5_CQE_INVALID
) &&
91 !((cqe64
->op_own
& MLX5_CQE_OWNER_MASK
) ^ !!(n
& (cq
->ibcq
.cqe
+ 1)))) {
98 static void *next_cqe_sw(struct mlx5_ib_cq
*cq
)
100 return get_sw_cqe(cq
, cq
->mcq
.cons_index
);
103 static enum ib_wc_opcode
get_umr_comp(struct mlx5_ib_wq
*wq
, int idx
)
105 switch (wq
->wr_data
[idx
]) {
109 case IB_WR_LOCAL_INV
:
110 return IB_WC_LOCAL_INV
;
116 pr_warn("unknown completion status\n");
121 static void handle_good_req(struct ib_wc
*wc
, struct mlx5_cqe64
*cqe
,
122 struct mlx5_ib_wq
*wq
, int idx
)
125 switch (be32_to_cpu(cqe
->sop_drop_qpn
) >> 24) {
126 case MLX5_OPCODE_RDMA_WRITE_IMM
:
127 wc
->wc_flags
|= IB_WC_WITH_IMM
;
128 case MLX5_OPCODE_RDMA_WRITE
:
129 wc
->opcode
= IB_WC_RDMA_WRITE
;
131 case MLX5_OPCODE_SEND_IMM
:
132 wc
->wc_flags
|= IB_WC_WITH_IMM
;
133 case MLX5_OPCODE_SEND
:
134 case MLX5_OPCODE_SEND_INVAL
:
135 wc
->opcode
= IB_WC_SEND
;
137 case MLX5_OPCODE_RDMA_READ
:
138 wc
->opcode
= IB_WC_RDMA_READ
;
139 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
141 case MLX5_OPCODE_ATOMIC_CS
:
142 wc
->opcode
= IB_WC_COMP_SWAP
;
145 case MLX5_OPCODE_ATOMIC_FA
:
146 wc
->opcode
= IB_WC_FETCH_ADD
;
149 case MLX5_OPCODE_ATOMIC_MASKED_CS
:
150 wc
->opcode
= IB_WC_MASKED_COMP_SWAP
;
153 case MLX5_OPCODE_ATOMIC_MASKED_FA
:
154 wc
->opcode
= IB_WC_MASKED_FETCH_ADD
;
157 case MLX5_OPCODE_BIND_MW
:
158 wc
->opcode
= IB_WC_BIND_MW
;
160 case MLX5_OPCODE_UMR
:
161 wc
->opcode
= get_umr_comp(wq
, idx
);
167 MLX5_GRH_IN_BUFFER
= 1,
171 static void handle_responder(struct ib_wc
*wc
, struct mlx5_cqe64
*cqe
,
172 struct mlx5_ib_qp
*qp
)
174 struct mlx5_ib_dev
*dev
= to_mdev(qp
->ibqp
.device
);
175 struct mlx5_ib_srq
*srq
;
176 struct mlx5_ib_wq
*wq
;
180 if (qp
->ibqp
.srq
|| qp
->ibqp
.xrcd
) {
181 struct mlx5_core_srq
*msrq
= NULL
;
184 msrq
= mlx5_core_get_srq(dev
->mdev
,
185 be32_to_cpu(cqe
->srqn
));
186 srq
= to_mibsrq(msrq
);
188 srq
= to_msrq(qp
->ibqp
.srq
);
191 wqe_ctr
= be16_to_cpu(cqe
->wqe_counter
);
192 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
193 mlx5_ib_free_srq_wqe(srq
, wqe_ctr
);
194 if (msrq
&& atomic_dec_and_test(&msrq
->refcount
))
195 complete(&msrq
->free
);
199 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
202 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
204 switch (cqe
->op_own
>> 4) {
205 case MLX5_CQE_RESP_WR_IMM
:
206 wc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
207 wc
->wc_flags
= IB_WC_WITH_IMM
;
208 wc
->ex
.imm_data
= cqe
->imm_inval_pkey
;
210 case MLX5_CQE_RESP_SEND
:
211 wc
->opcode
= IB_WC_RECV
;
214 case MLX5_CQE_RESP_SEND_IMM
:
215 wc
->opcode
= IB_WC_RECV
;
216 wc
->wc_flags
= IB_WC_WITH_IMM
;
217 wc
->ex
.imm_data
= cqe
->imm_inval_pkey
;
219 case MLX5_CQE_RESP_SEND_INV
:
220 wc
->opcode
= IB_WC_RECV
;
221 wc
->wc_flags
= IB_WC_WITH_INVALIDATE
;
222 wc
->ex
.invalidate_rkey
= be32_to_cpu(cqe
->imm_inval_pkey
);
225 wc
->slid
= be16_to_cpu(cqe
->slid
);
226 wc
->sl
= (be32_to_cpu(cqe
->flags_rqpn
) >> 24) & 0xf;
227 wc
->src_qp
= be32_to_cpu(cqe
->flags_rqpn
) & 0xffffff;
228 wc
->dlid_path_bits
= cqe
->ml_path
;
229 g
= (be32_to_cpu(cqe
->flags_rqpn
) >> 28) & 3;
230 wc
->wc_flags
|= g
? IB_WC_GRH
: 0;
231 if (unlikely(is_qp1(qp
->ibqp
.qp_type
))) {
232 u16 pkey
= be32_to_cpu(cqe
->imm_inval_pkey
) & 0xffff;
234 ib_find_cached_pkey(&dev
->ib_dev
, qp
->port
, pkey
,
241 static void dump_cqe(struct mlx5_ib_dev
*dev
, struct mlx5_err_cqe
*cqe
)
243 __be32
*p
= (__be32
*)cqe
;
246 mlx5_ib_warn(dev
, "dump error cqe\n");
247 for (i
= 0; i
< sizeof(*cqe
) / 16; i
++, p
+= 4)
248 pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p
[0]),
249 be32_to_cpu(p
[1]), be32_to_cpu(p
[2]),
253 static void mlx5_handle_error_cqe(struct mlx5_ib_dev
*dev
,
254 struct mlx5_err_cqe
*cqe
,
259 switch (cqe
->syndrome
) {
260 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR
:
261 wc
->status
= IB_WC_LOC_LEN_ERR
;
263 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR
:
264 wc
->status
= IB_WC_LOC_QP_OP_ERR
;
266 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR
:
267 wc
->status
= IB_WC_LOC_PROT_ERR
;
269 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR
:
271 wc
->status
= IB_WC_WR_FLUSH_ERR
;
273 case MLX5_CQE_SYNDROME_MW_BIND_ERR
:
274 wc
->status
= IB_WC_MW_BIND_ERR
;
276 case MLX5_CQE_SYNDROME_BAD_RESP_ERR
:
277 wc
->status
= IB_WC_BAD_RESP_ERR
;
279 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR
:
280 wc
->status
= IB_WC_LOC_ACCESS_ERR
;
282 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR
:
283 wc
->status
= IB_WC_REM_INV_REQ_ERR
;
285 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR
:
286 wc
->status
= IB_WC_REM_ACCESS_ERR
;
288 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR
:
289 wc
->status
= IB_WC_REM_OP_ERR
;
291 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR
:
292 wc
->status
= IB_WC_RETRY_EXC_ERR
;
295 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR
:
296 wc
->status
= IB_WC_RNR_RETRY_EXC_ERR
;
299 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR
:
300 wc
->status
= IB_WC_REM_ABORT_ERR
;
303 wc
->status
= IB_WC_GENERAL_ERR
;
307 wc
->vendor_err
= cqe
->vendor_err_synd
;
312 static int is_atomic_response(struct mlx5_ib_qp
*qp
, uint16_t idx
)
314 /* TBD: waiting decision
319 static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp
*qp
, uint16_t idx
)
321 struct mlx5_wqe_data_seg
*dpseg
;
324 dpseg
= mlx5_get_send_wqe(qp
, idx
) + sizeof(struct mlx5_wqe_ctrl_seg
) +
325 sizeof(struct mlx5_wqe_raddr_seg
) +
326 sizeof(struct mlx5_wqe_atomic_seg
);
327 addr
= (void *)(unsigned long)be64_to_cpu(dpseg
->addr
);
331 static void handle_atomic(struct mlx5_ib_qp
*qp
, struct mlx5_cqe64
*cqe64
,
338 if (!is_atomic_response(qp
, idx
))
341 byte_count
= be32_to_cpu(cqe64
->byte_cnt
);
342 addr
= mlx5_get_atomic_laddr(qp
, idx
);
344 if (byte_count
== 4) {
345 *(uint32_t *)addr
= be32_to_cpu(*((__be32
*)addr
));
347 for (i
= 0; i
< byte_count
; i
+= 8) {
348 *(uint64_t *)addr
= be64_to_cpu(*((__be64
*)addr
));
356 static void handle_atomics(struct mlx5_ib_qp
*qp
, struct mlx5_cqe64
*cqe64
,
362 idx
= tail
& (qp
->sq
.wqe_cnt
- 1);
363 handle_atomic(qp
, cqe64
, idx
);
367 tail
= qp
->sq
.w_list
[idx
].next
;
369 tail
= qp
->sq
.w_list
[idx
].next
;
370 qp
->sq
.last_poll
= tail
;
373 static void free_cq_buf(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq_buf
*buf
)
375 mlx5_buf_free(dev
->mdev
, &buf
->buf
);
378 static void get_sig_err_item(struct mlx5_sig_err_cqe
*cqe
,
379 struct ib_sig_err
*item
)
381 u16 syndrome
= be16_to_cpu(cqe
->syndrome
);
383 #define GUARD_ERR (1 << 13)
384 #define APPTAG_ERR (1 << 12)
385 #define REFTAG_ERR (1 << 11)
387 if (syndrome
& GUARD_ERR
) {
388 item
->err_type
= IB_SIG_BAD_GUARD
;
389 item
->expected
= be32_to_cpu(cqe
->expected_trans_sig
) >> 16;
390 item
->actual
= be32_to_cpu(cqe
->actual_trans_sig
) >> 16;
392 if (syndrome
& REFTAG_ERR
) {
393 item
->err_type
= IB_SIG_BAD_REFTAG
;
394 item
->expected
= be32_to_cpu(cqe
->expected_reftag
);
395 item
->actual
= be32_to_cpu(cqe
->actual_reftag
);
397 if (syndrome
& APPTAG_ERR
) {
398 item
->err_type
= IB_SIG_BAD_APPTAG
;
399 item
->expected
= be32_to_cpu(cqe
->expected_trans_sig
) & 0xffff;
400 item
->actual
= be32_to_cpu(cqe
->actual_trans_sig
) & 0xffff;
402 pr_err("Got signature completion error with bad syndrome %04x\n",
406 item
->sig_err_offset
= be64_to_cpu(cqe
->err_offset
);
407 item
->key
= be32_to_cpu(cqe
->mkey
);
410 static int mlx5_poll_one(struct mlx5_ib_cq
*cq
,
411 struct mlx5_ib_qp
**cur_qp
,
414 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
415 struct mlx5_err_cqe
*err_cqe
;
416 struct mlx5_cqe64
*cqe64
;
417 struct mlx5_core_qp
*mqp
;
418 struct mlx5_ib_wq
*wq
;
419 struct mlx5_sig_err_cqe
*sig_err_cqe
;
420 struct mlx5_core_mr
*mmr
;
421 struct mlx5_ib_mr
*mr
;
429 cqe
= next_cqe_sw(cq
);
433 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
435 ++cq
->mcq
.cons_index
;
437 /* Make sure we read CQ entry contents after we've checked the
442 opcode
= cqe64
->op_own
>> 4;
443 if (unlikely(opcode
== MLX5_CQE_RESIZE_CQ
)) {
444 if (likely(cq
->resize_buf
)) {
445 free_cq_buf(dev
, &cq
->buf
);
446 cq
->buf
= *cq
->resize_buf
;
447 kfree(cq
->resize_buf
);
448 cq
->resize_buf
= NULL
;
451 mlx5_ib_warn(dev
, "unexpected resize cqe\n");
455 qpn
= ntohl(cqe64
->sop_drop_qpn
) & 0xffffff;
456 if (!*cur_qp
|| (qpn
!= (*cur_qp
)->ibqp
.qp_num
)) {
457 /* We do not have to take the QP table lock here,
458 * because CQs will be locked while QPs are removed
461 mqp
= __mlx5_qp_lookup(dev
->mdev
, qpn
);
462 if (unlikely(!mqp
)) {
463 mlx5_ib_warn(dev
, "CQE@CQ %06x for unknown QPN %6x\n",
468 *cur_qp
= to_mibqp(mqp
);
471 wc
->qp
= &(*cur_qp
)->ibqp
;
475 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
476 idx
= wqe_ctr
& (wq
->wqe_cnt
- 1);
477 handle_good_req(wc
, cqe64
, wq
, idx
);
478 handle_atomics(*cur_qp
, cqe64
, wq
->last_poll
, idx
);
479 wc
->wr_id
= wq
->wrid
[idx
];
480 wq
->tail
= wq
->wqe_head
[idx
] + 1;
481 wc
->status
= IB_WC_SUCCESS
;
483 case MLX5_CQE_RESP_WR_IMM
:
484 case MLX5_CQE_RESP_SEND
:
485 case MLX5_CQE_RESP_SEND_IMM
:
486 case MLX5_CQE_RESP_SEND_INV
:
487 handle_responder(wc
, cqe64
, *cur_qp
);
488 wc
->status
= IB_WC_SUCCESS
;
490 case MLX5_CQE_RESIZE_CQ
:
492 case MLX5_CQE_REQ_ERR
:
493 case MLX5_CQE_RESP_ERR
:
494 err_cqe
= (struct mlx5_err_cqe
*)cqe64
;
495 mlx5_handle_error_cqe(dev
, err_cqe
, wc
);
496 mlx5_ib_dbg(dev
, "%s error cqe on cqn 0x%x:\n",
497 opcode
== MLX5_CQE_REQ_ERR
?
498 "Requestor" : "Responder", cq
->mcq
.cqn
);
499 mlx5_ib_dbg(dev
, "syndrome 0x%x, vendor syndrome 0x%x\n",
500 err_cqe
->syndrome
, err_cqe
->vendor_err_synd
);
501 if (opcode
== MLX5_CQE_REQ_ERR
) {
503 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
504 idx
= wqe_ctr
& (wq
->wqe_cnt
- 1);
505 wc
->wr_id
= wq
->wrid
[idx
];
506 wq
->tail
= wq
->wqe_head
[idx
] + 1;
508 struct mlx5_ib_srq
*srq
;
510 if ((*cur_qp
)->ibqp
.srq
) {
511 srq
= to_msrq((*cur_qp
)->ibqp
.srq
);
512 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
513 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
514 mlx5_ib_free_srq_wqe(srq
, wqe_ctr
);
517 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
522 case MLX5_CQE_SIG_ERR
:
523 sig_err_cqe
= (struct mlx5_sig_err_cqe
*)cqe64
;
525 read_lock(&dev
->mdev
->priv
.mr_table
.lock
);
526 mmr
= __mlx5_mr_lookup(dev
->mdev
,
527 mlx5_base_mkey(be32_to_cpu(sig_err_cqe
->mkey
)));
528 if (unlikely(!mmr
)) {
529 read_unlock(&dev
->mdev
->priv
.mr_table
.lock
);
530 mlx5_ib_warn(dev
, "CQE@CQ %06x for unknown MR %6x\n",
531 cq
->mcq
.cqn
, be32_to_cpu(sig_err_cqe
->mkey
));
536 get_sig_err_item(sig_err_cqe
, &mr
->sig
->err_item
);
537 mr
->sig
->sig_err_exists
= true;
538 mr
->sig
->sigerr_count
++;
540 mlx5_ib_warn(dev
, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
541 cq
->mcq
.cqn
, mr
->sig
->err_item
.key
,
542 mr
->sig
->err_item
.err_type
,
543 mr
->sig
->err_item
.sig_err_offset
,
544 mr
->sig
->err_item
.expected
,
545 mr
->sig
->err_item
.actual
);
547 read_unlock(&dev
->mdev
->priv
.mr_table
.lock
);
554 int mlx5_ib_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
556 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
557 struct mlx5_ib_qp
*cur_qp
= NULL
;
562 spin_lock_irqsave(&cq
->lock
, flags
);
564 for (npolled
= 0; npolled
< num_entries
; npolled
++) {
565 err
= mlx5_poll_one(cq
, &cur_qp
, wc
+ npolled
);
571 mlx5_cq_set_ci(&cq
->mcq
);
573 spin_unlock_irqrestore(&cq
->lock
, flags
);
575 if (err
== 0 || err
== -EAGAIN
)
581 int mlx5_ib_arm_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
583 struct mlx5_core_dev
*mdev
= to_mdev(ibcq
->device
)->mdev
;
584 void __iomem
*uar_page
= mdev
->priv
.uuari
.uars
[0].map
;
586 mlx5_cq_arm(&to_mcq(ibcq
)->mcq
,
587 (flags
& IB_CQ_SOLICITED_MASK
) == IB_CQ_SOLICITED
?
588 MLX5_CQ_DB_REQ_NOT_SOL
: MLX5_CQ_DB_REQ_NOT
,
590 MLX5_GET_DOORBELL_LOCK(&mdev
->priv
.cq_uar_lock
),
591 to_mcq(ibcq
)->mcq
.cons_index
);
596 static int alloc_cq_buf(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq_buf
*buf
,
597 int nent
, int cqe_size
)
601 err
= mlx5_buf_alloc(dev
->mdev
, nent
* cqe_size
, &buf
->buf
);
605 buf
->cqe_size
= cqe_size
;
611 static int create_cq_user(struct mlx5_ib_dev
*dev
, struct ib_udata
*udata
,
612 struct ib_ucontext
*context
, struct mlx5_ib_cq
*cq
,
613 int entries
, struct mlx5_create_cq_mbox_in
**cqb
,
614 int *cqe_size
, int *index
, int *inlen
)
616 struct mlx5_ib_create_cq ucmd
;
624 (udata
->inlen
- sizeof(struct ib_uverbs_cmd_hdr
) <
625 sizeof(ucmd
)) ? (sizeof(ucmd
) -
626 sizeof(ucmd
.reserved
)) : sizeof(ucmd
);
628 if (ib_copy_from_udata(&ucmd
, udata
, ucmdlen
))
631 if (ucmdlen
== sizeof(ucmd
) &&
635 if (ucmd
.cqe_size
!= 64 && ucmd
.cqe_size
!= 128)
638 *cqe_size
= ucmd
.cqe_size
;
640 cq
->buf
.umem
= ib_umem_get(context
, ucmd
.buf_addr
,
641 entries
* ucmd
.cqe_size
,
642 IB_ACCESS_LOCAL_WRITE
, 1);
643 if (IS_ERR(cq
->buf
.umem
)) {
644 err
= PTR_ERR(cq
->buf
.umem
);
648 err
= mlx5_ib_db_map_user(to_mucontext(context
), ucmd
.db_addr
,
653 mlx5_ib_cont_pages(cq
->buf
.umem
, ucmd
.buf_addr
, &npages
, &page_shift
,
655 mlx5_ib_dbg(dev
, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
656 ucmd
.buf_addr
, entries
* ucmd
.cqe_size
, npages
, page_shift
, ncont
);
658 *inlen
= sizeof(**cqb
) + sizeof(*(*cqb
)->pas
) * ncont
;
659 *cqb
= mlx5_vzalloc(*inlen
);
664 mlx5_ib_populate_pas(dev
, cq
->buf
.umem
, page_shift
, (*cqb
)->pas
, 0);
665 (*cqb
)->ctx
.log_pg_sz
= page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
667 *index
= to_mucontext(context
)->uuari
.uars
[0].index
;
672 mlx5_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
675 ib_umem_release(cq
->buf
.umem
);
679 static void destroy_cq_user(struct mlx5_ib_cq
*cq
, struct ib_ucontext
*context
)
681 mlx5_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
682 ib_umem_release(cq
->buf
.umem
);
685 static void init_cq_buf(struct mlx5_ib_cq
*cq
, struct mlx5_ib_cq_buf
*buf
)
689 struct mlx5_cqe64
*cqe64
;
691 for (i
= 0; i
< buf
->nent
; i
++) {
692 cqe
= get_cqe_from_buf(buf
, i
, buf
->cqe_size
);
693 cqe64
= buf
->cqe_size
== 64 ? cqe
: cqe
+ 64;
694 cqe64
->op_own
= MLX5_CQE_INVALID
<< 4;
698 static int create_cq_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
699 int entries
, int cqe_size
,
700 struct mlx5_create_cq_mbox_in
**cqb
,
701 int *index
, int *inlen
)
705 err
= mlx5_db_alloc(dev
->mdev
, &cq
->db
);
709 cq
->mcq
.set_ci_db
= cq
->db
.db
;
710 cq
->mcq
.arm_db
= cq
->db
.db
+ 1;
711 cq
->mcq
.cqe_sz
= cqe_size
;
713 err
= alloc_cq_buf(dev
, &cq
->buf
, entries
, cqe_size
);
717 init_cq_buf(cq
, &cq
->buf
);
719 *inlen
= sizeof(**cqb
) + sizeof(*(*cqb
)->pas
) * cq
->buf
.buf
.npages
;
720 *cqb
= mlx5_vzalloc(*inlen
);
725 mlx5_fill_page_array(&cq
->buf
.buf
, (*cqb
)->pas
);
727 (*cqb
)->ctx
.log_pg_sz
= cq
->buf
.buf
.page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
728 *index
= dev
->mdev
->priv
.uuari
.uars
[0].index
;
733 free_cq_buf(dev
, &cq
->buf
);
736 mlx5_db_free(dev
->mdev
, &cq
->db
);
740 static void destroy_cq_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
)
742 free_cq_buf(dev
, &cq
->buf
);
743 mlx5_db_free(dev
->mdev
, &cq
->db
);
746 struct ib_cq
*mlx5_ib_create_cq(struct ib_device
*ibdev
,
747 const struct ib_cq_init_attr
*attr
,
748 struct ib_ucontext
*context
,
749 struct ib_udata
*udata
)
751 int entries
= attr
->cqe
;
752 int vector
= attr
->comp_vector
;
753 struct mlx5_create_cq_mbox_in
*cqb
= NULL
;
754 struct mlx5_ib_dev
*dev
= to_mdev(ibdev
);
755 struct mlx5_ib_cq
*cq
;
756 int uninitialized_var(index
);
757 int uninitialized_var(inlen
);
764 return ERR_PTR(-EINVAL
);
767 return ERR_PTR(-EINVAL
);
769 entries
= roundup_pow_of_two(entries
+ 1);
770 if (entries
> (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_cq_sz
)))
771 return ERR_PTR(-EINVAL
);
773 cq
= kzalloc(sizeof(*cq
), GFP_KERNEL
);
775 return ERR_PTR(-ENOMEM
);
777 cq
->ibcq
.cqe
= entries
- 1;
778 mutex_init(&cq
->resize_mutex
);
779 spin_lock_init(&cq
->lock
);
780 cq
->resize_buf
= NULL
;
781 cq
->resize_umem
= NULL
;
784 err
= create_cq_user(dev
, udata
, context
, cq
, entries
,
785 &cqb
, &cqe_size
, &index
, &inlen
);
789 /* for now choose 64 bytes till we have a proper interface */
791 err
= create_cq_kernel(dev
, cq
, entries
, cqe_size
, &cqb
,
797 cq
->cqe_size
= cqe_size
;
798 cqb
->ctx
.cqe_sz_flags
= cqe_sz_to_mlx_sz(cqe_size
) << 5;
799 cqb
->ctx
.log_sz_usr_page
= cpu_to_be32((ilog2(entries
) << 24) | index
);
800 err
= mlx5_vector2eqn(dev
->mdev
, vector
, &eqn
, &irqn
);
804 cqb
->ctx
.c_eqn
= cpu_to_be16(eqn
);
805 cqb
->ctx
.db_record_addr
= cpu_to_be64(cq
->db
.dma
);
807 err
= mlx5_core_create_cq(dev
->mdev
, &cq
->mcq
, cqb
, inlen
);
811 mlx5_ib_dbg(dev
, "cqn 0x%x\n", cq
->mcq
.cqn
);
813 cq
->mcq
.comp
= mlx5_ib_cq_comp
;
814 cq
->mcq
.event
= mlx5_ib_cq_event
;
817 if (ib_copy_to_udata(udata
, &cq
->mcq
.cqn
, sizeof(__u32
))) {
827 mlx5_core_destroy_cq(dev
->mdev
, &cq
->mcq
);
832 destroy_cq_user(cq
, context
);
834 destroy_cq_kernel(dev
, cq
);
843 int mlx5_ib_destroy_cq(struct ib_cq
*cq
)
845 struct mlx5_ib_dev
*dev
= to_mdev(cq
->device
);
846 struct mlx5_ib_cq
*mcq
= to_mcq(cq
);
847 struct ib_ucontext
*context
= NULL
;
850 context
= cq
->uobject
->context
;
852 mlx5_core_destroy_cq(dev
->mdev
, &mcq
->mcq
);
854 destroy_cq_user(mcq
, context
);
856 destroy_cq_kernel(dev
, mcq
);
863 static int is_equal_rsn(struct mlx5_cqe64
*cqe64
, u32 rsn
)
865 return rsn
== (ntohl(cqe64
->sop_drop_qpn
) & 0xffffff);
868 void __mlx5_ib_cq_clean(struct mlx5_ib_cq
*cq
, u32 rsn
, struct mlx5_ib_srq
*srq
)
870 struct mlx5_cqe64
*cqe64
, *dest64
;
879 /* First we need to find the current producer index, so we
880 * know where to start cleaning from. It doesn't matter if HW
881 * adds new entries after this loop -- the QP we're worried
882 * about is already in RESET, so the new entries won't come
883 * from our QP and therefore don't need to be checked.
885 for (prod_index
= cq
->mcq
.cons_index
; get_sw_cqe(cq
, prod_index
); prod_index
++)
886 if (prod_index
== cq
->mcq
.cons_index
+ cq
->ibcq
.cqe
)
889 /* Now sweep backwards through the CQ, removing CQ entries
890 * that match our QP by copying older entries on top of them.
892 while ((int) --prod_index
- (int) cq
->mcq
.cons_index
>= 0) {
893 cqe
= get_cqe(cq
, prod_index
& cq
->ibcq
.cqe
);
894 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
895 if (is_equal_rsn(cqe64
, rsn
)) {
896 if (srq
&& (ntohl(cqe64
->srqn
) & 0xffffff))
897 mlx5_ib_free_srq_wqe(srq
, be16_to_cpu(cqe64
->wqe_counter
));
900 dest
= get_cqe(cq
, (prod_index
+ nfreed
) & cq
->ibcq
.cqe
);
901 dest64
= (cq
->mcq
.cqe_sz
== 64) ? dest
: dest
+ 64;
902 owner_bit
= dest64
->op_own
& MLX5_CQE_OWNER_MASK
;
903 memcpy(dest
, cqe
, cq
->mcq
.cqe_sz
);
904 dest64
->op_own
= owner_bit
|
905 (dest64
->op_own
& ~MLX5_CQE_OWNER_MASK
);
910 cq
->mcq
.cons_index
+= nfreed
;
911 /* Make sure update of buffer contents is done before
912 * updating consumer index.
915 mlx5_cq_set_ci(&cq
->mcq
);
919 void mlx5_ib_cq_clean(struct mlx5_ib_cq
*cq
, u32 qpn
, struct mlx5_ib_srq
*srq
)
924 spin_lock_irq(&cq
->lock
);
925 __mlx5_ib_cq_clean(cq
, qpn
, srq
);
926 spin_unlock_irq(&cq
->lock
);
929 int mlx5_ib_modify_cq(struct ib_cq
*cq
, u16 cq_count
, u16 cq_period
)
931 struct mlx5_modify_cq_mbox_in
*in
;
932 struct mlx5_ib_dev
*dev
= to_mdev(cq
->device
);
933 struct mlx5_ib_cq
*mcq
= to_mcq(cq
);
937 if (!MLX5_CAP_GEN(dev
->mdev
, cq_moderation
))
940 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
944 in
->cqn
= cpu_to_be32(mcq
->mcq
.cqn
);
945 fsel
= (MLX5_CQ_MODIFY_PERIOD
| MLX5_CQ_MODIFY_COUNT
);
946 in
->ctx
.cq_period
= cpu_to_be16(cq_period
);
947 in
->ctx
.cq_max_count
= cpu_to_be16(cq_count
);
948 in
->field_select
= cpu_to_be32(fsel
);
949 err
= mlx5_core_modify_cq(dev
->mdev
, &mcq
->mcq
, in
, sizeof(*in
));
953 mlx5_ib_warn(dev
, "modify cq 0x%x failed\n", mcq
->mcq
.cqn
);
958 static int resize_user(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
959 int entries
, struct ib_udata
*udata
, int *npas
,
960 int *page_shift
, int *cqe_size
)
962 struct mlx5_ib_resize_cq ucmd
;
963 struct ib_umem
*umem
;
966 struct ib_ucontext
*context
= cq
->buf
.umem
->context
;
968 err
= ib_copy_from_udata(&ucmd
, udata
, sizeof(ucmd
));
972 if (ucmd
.reserved0
|| ucmd
.reserved1
)
975 umem
= ib_umem_get(context
, ucmd
.buf_addr
, entries
* ucmd
.cqe_size
,
976 IB_ACCESS_LOCAL_WRITE
, 1);
982 mlx5_ib_cont_pages(umem
, ucmd
.buf_addr
, &npages
, page_shift
,
985 cq
->resize_umem
= umem
;
986 *cqe_size
= ucmd
.cqe_size
;
991 static void un_resize_user(struct mlx5_ib_cq
*cq
)
993 ib_umem_release(cq
->resize_umem
);
996 static int resize_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
997 int entries
, int cqe_size
)
1001 cq
->resize_buf
= kzalloc(sizeof(*cq
->resize_buf
), GFP_KERNEL
);
1002 if (!cq
->resize_buf
)
1005 err
= alloc_cq_buf(dev
, cq
->resize_buf
, entries
, cqe_size
);
1009 init_cq_buf(cq
, cq
->resize_buf
);
1014 kfree(cq
->resize_buf
);
1018 static void un_resize_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
)
1020 free_cq_buf(dev
, cq
->resize_buf
);
1021 cq
->resize_buf
= NULL
;
1024 static int copy_resize_cqes(struct mlx5_ib_cq
*cq
)
1026 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
1027 struct mlx5_cqe64
*scqe64
;
1028 struct mlx5_cqe64
*dcqe64
;
1037 ssize
= cq
->buf
.cqe_size
;
1038 dsize
= cq
->resize_buf
->cqe_size
;
1039 if (ssize
!= dsize
) {
1040 mlx5_ib_warn(dev
, "resize from different cqe size is not supported\n");
1044 i
= cq
->mcq
.cons_index
;
1045 scqe
= get_sw_cqe(cq
, i
);
1046 scqe64
= ssize
== 64 ? scqe
: scqe
+ 64;
1049 mlx5_ib_warn(dev
, "expected cqe in sw ownership\n");
1053 while ((scqe64
->op_own
>> 4) != MLX5_CQE_RESIZE_CQ
) {
1054 dcqe
= get_cqe_from_buf(cq
->resize_buf
,
1055 (i
+ 1) & (cq
->resize_buf
->nent
),
1057 dcqe64
= dsize
== 64 ? dcqe
: dcqe
+ 64;
1058 sw_own
= sw_ownership_bit(i
+ 1, cq
->resize_buf
->nent
);
1059 memcpy(dcqe
, scqe
, dsize
);
1060 dcqe64
->op_own
= (dcqe64
->op_own
& ~MLX5_CQE_OWNER_MASK
) | sw_own
;
1063 scqe
= get_sw_cqe(cq
, i
);
1064 scqe64
= ssize
== 64 ? scqe
: scqe
+ 64;
1066 mlx5_ib_warn(dev
, "expected cqe in sw ownership\n");
1070 if (scqe
== start_cqe
) {
1071 pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
1076 ++cq
->mcq
.cons_index
;
1080 int mlx5_ib_resize_cq(struct ib_cq
*ibcq
, int entries
, struct ib_udata
*udata
)
1082 struct mlx5_ib_dev
*dev
= to_mdev(ibcq
->device
);
1083 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
1084 struct mlx5_modify_cq_mbox_in
*in
;
1089 int uninitialized_var(cqe_size
);
1090 unsigned long flags
;
1092 if (!MLX5_CAP_GEN(dev
->mdev
, cq_resize
)) {
1093 pr_info("Firmware does not support resize CQ\n");
1100 entries
= roundup_pow_of_two(entries
+ 1);
1101 if (entries
> (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_cq_sz
)) + 1)
1104 if (entries
== ibcq
->cqe
+ 1)
1107 mutex_lock(&cq
->resize_mutex
);
1109 err
= resize_user(dev
, cq
, entries
, udata
, &npas
, &page_shift
,
1113 err
= resize_kernel(dev
, cq
, entries
, cqe_size
);
1115 npas
= cq
->resize_buf
->buf
.npages
;
1116 page_shift
= cq
->resize_buf
->buf
.page_shift
;
1123 inlen
= sizeof(*in
) + npas
* sizeof(in
->pas
[0]);
1124 in
= mlx5_vzalloc(inlen
);
1131 mlx5_ib_populate_pas(dev
, cq
->resize_umem
, page_shift
,
1134 mlx5_fill_page_array(&cq
->resize_buf
->buf
, in
->pas
);
1136 in
->field_select
= cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE
|
1137 MLX5_MODIFY_CQ_MASK_PG_OFFSET
|
1138 MLX5_MODIFY_CQ_MASK_PG_SIZE
);
1139 in
->ctx
.log_pg_sz
= page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
1140 in
->ctx
.cqe_sz_flags
= cqe_sz_to_mlx_sz(cqe_size
) << 5;
1141 in
->ctx
.page_offset
= 0;
1142 in
->ctx
.log_sz_usr_page
= cpu_to_be32(ilog2(entries
) << 24);
1143 in
->hdr
.opmod
= cpu_to_be16(MLX5_CQ_OPMOD_RESIZE
);
1144 in
->cqn
= cpu_to_be32(cq
->mcq
.cqn
);
1146 err
= mlx5_core_modify_cq(dev
->mdev
, &cq
->mcq
, in
, inlen
);
1151 cq
->ibcq
.cqe
= entries
- 1;
1152 ib_umem_release(cq
->buf
.umem
);
1153 cq
->buf
.umem
= cq
->resize_umem
;
1154 cq
->resize_umem
= NULL
;
1156 struct mlx5_ib_cq_buf tbuf
;
1159 spin_lock_irqsave(&cq
->lock
, flags
);
1160 if (cq
->resize_buf
) {
1161 err
= copy_resize_cqes(cq
);
1164 cq
->buf
= *cq
->resize_buf
;
1165 kfree(cq
->resize_buf
);
1166 cq
->resize_buf
= NULL
;
1170 cq
->ibcq
.cqe
= entries
- 1;
1171 spin_unlock_irqrestore(&cq
->lock
, flags
);
1173 free_cq_buf(dev
, &tbuf
);
1175 mutex_unlock(&cq
->resize_mutex
);
1187 un_resize_kernel(dev
, cq
);
1189 mutex_unlock(&cq
->resize_mutex
);
1193 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev
*dev
, struct ib_cq
*ibcq
)
1195 struct mlx5_ib_cq
*cq
;
1201 return cq
->cqe_size
;