2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/kref.h>
34 #include <rdma/ib_umem.h>
35 #include <rdma/ib_user_verbs.h>
36 #include <rdma/ib_cache.h>
40 static void mlx5_ib_cq_comp(struct mlx5_core_cq
*cq
)
42 struct ib_cq
*ibcq
= &to_mibcq(cq
)->ibcq
;
44 ibcq
->comp_handler(ibcq
, ibcq
->cq_context
);
47 static void mlx5_ib_cq_event(struct mlx5_core_cq
*mcq
, enum mlx5_event type
)
49 struct mlx5_ib_cq
*cq
= container_of(mcq
, struct mlx5_ib_cq
, mcq
);
50 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
51 struct ib_cq
*ibcq
= &cq
->ibcq
;
52 struct ib_event event
;
54 if (type
!= MLX5_EVENT_TYPE_CQ_ERROR
) {
55 mlx5_ib_warn(dev
, "Unexpected event type %d on CQ %06x\n",
60 if (ibcq
->event_handler
) {
61 event
.device
= &dev
->ib_dev
;
62 event
.event
= IB_EVENT_CQ_ERR
;
63 event
.element
.cq
= ibcq
;
64 ibcq
->event_handler(&event
, ibcq
->cq_context
);
68 static void *get_cqe_from_buf(struct mlx5_ib_cq_buf
*buf
, int n
, int size
)
70 return mlx5_buf_offset(&buf
->buf
, n
* size
);
73 static void *get_cqe(struct mlx5_ib_cq
*cq
, int n
)
75 return get_cqe_from_buf(&cq
->buf
, n
, cq
->mcq
.cqe_sz
);
78 static u8
sw_ownership_bit(int n
, int nent
)
80 return (n
& nent
) ? 1 : 0;
83 static void *get_sw_cqe(struct mlx5_ib_cq
*cq
, int n
)
85 void *cqe
= get_cqe(cq
, n
& cq
->ibcq
.cqe
);
86 struct mlx5_cqe64
*cqe64
;
88 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
90 if (likely((cqe64
->op_own
) >> 4 != MLX5_CQE_INVALID
) &&
91 !((cqe64
->op_own
& MLX5_CQE_OWNER_MASK
) ^ !!(n
& (cq
->ibcq
.cqe
+ 1)))) {
98 static void *next_cqe_sw(struct mlx5_ib_cq
*cq
)
100 return get_sw_cqe(cq
, cq
->mcq
.cons_index
);
103 static enum ib_wc_opcode
get_umr_comp(struct mlx5_ib_wq
*wq
, int idx
)
105 switch (wq
->wr_data
[idx
]) {
109 case IB_WR_LOCAL_INV
:
110 return IB_WC_LOCAL_INV
;
116 pr_warn("unknown completion status\n");
121 static void handle_good_req(struct ib_wc
*wc
, struct mlx5_cqe64
*cqe
,
122 struct mlx5_ib_wq
*wq
, int idx
)
125 switch (be32_to_cpu(cqe
->sop_drop_qpn
) >> 24) {
126 case MLX5_OPCODE_RDMA_WRITE_IMM
:
127 wc
->wc_flags
|= IB_WC_WITH_IMM
;
128 case MLX5_OPCODE_RDMA_WRITE
:
129 wc
->opcode
= IB_WC_RDMA_WRITE
;
131 case MLX5_OPCODE_SEND_IMM
:
132 wc
->wc_flags
|= IB_WC_WITH_IMM
;
133 case MLX5_OPCODE_SEND
:
134 case MLX5_OPCODE_SEND_INVAL
:
135 wc
->opcode
= IB_WC_SEND
;
137 case MLX5_OPCODE_RDMA_READ
:
138 wc
->opcode
= IB_WC_RDMA_READ
;
139 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
141 case MLX5_OPCODE_ATOMIC_CS
:
142 wc
->opcode
= IB_WC_COMP_SWAP
;
145 case MLX5_OPCODE_ATOMIC_FA
:
146 wc
->opcode
= IB_WC_FETCH_ADD
;
149 case MLX5_OPCODE_ATOMIC_MASKED_CS
:
150 wc
->opcode
= IB_WC_MASKED_COMP_SWAP
;
153 case MLX5_OPCODE_ATOMIC_MASKED_FA
:
154 wc
->opcode
= IB_WC_MASKED_FETCH_ADD
;
157 case MLX5_OPCODE_UMR
:
158 wc
->opcode
= get_umr_comp(wq
, idx
);
164 MLX5_GRH_IN_BUFFER
= 1,
168 static void handle_responder(struct ib_wc
*wc
, struct mlx5_cqe64
*cqe
,
169 struct mlx5_ib_qp
*qp
)
171 enum rdma_link_layer ll
= rdma_port_get_link_layer(qp
->ibqp
.device
, 1);
172 struct mlx5_ib_dev
*dev
= to_mdev(qp
->ibqp
.device
);
173 struct mlx5_ib_srq
*srq
;
174 struct mlx5_ib_wq
*wq
;
178 if (qp
->ibqp
.srq
|| qp
->ibqp
.xrcd
) {
179 struct mlx5_core_srq
*msrq
= NULL
;
182 msrq
= mlx5_core_get_srq(dev
->mdev
,
183 be32_to_cpu(cqe
->srqn
));
184 srq
= to_mibsrq(msrq
);
186 srq
= to_msrq(qp
->ibqp
.srq
);
189 wqe_ctr
= be16_to_cpu(cqe
->wqe_counter
);
190 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
191 mlx5_ib_free_srq_wqe(srq
, wqe_ctr
);
192 if (msrq
&& atomic_dec_and_test(&msrq
->refcount
))
193 complete(&msrq
->free
);
197 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
200 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
202 switch (cqe
->op_own
>> 4) {
203 case MLX5_CQE_RESP_WR_IMM
:
204 wc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
205 wc
->wc_flags
= IB_WC_WITH_IMM
;
206 wc
->ex
.imm_data
= cqe
->imm_inval_pkey
;
208 case MLX5_CQE_RESP_SEND
:
209 wc
->opcode
= IB_WC_RECV
;
210 wc
->wc_flags
= IB_WC_IP_CSUM_OK
;
211 if (unlikely(!((cqe
->hds_ip_ext
& CQE_L3_OK
) &&
212 (cqe
->hds_ip_ext
& CQE_L4_OK
))))
215 case MLX5_CQE_RESP_SEND_IMM
:
216 wc
->opcode
= IB_WC_RECV
;
217 wc
->wc_flags
= IB_WC_WITH_IMM
;
218 wc
->ex
.imm_data
= cqe
->imm_inval_pkey
;
220 case MLX5_CQE_RESP_SEND_INV
:
221 wc
->opcode
= IB_WC_RECV
;
222 wc
->wc_flags
= IB_WC_WITH_INVALIDATE
;
223 wc
->ex
.invalidate_rkey
= be32_to_cpu(cqe
->imm_inval_pkey
);
226 wc
->slid
= be16_to_cpu(cqe
->slid
);
227 wc
->sl
= (be32_to_cpu(cqe
->flags_rqpn
) >> 24) & 0xf;
228 wc
->src_qp
= be32_to_cpu(cqe
->flags_rqpn
) & 0xffffff;
229 wc
->dlid_path_bits
= cqe
->ml_path
;
230 g
= (be32_to_cpu(cqe
->flags_rqpn
) >> 28) & 3;
231 wc
->wc_flags
|= g
? IB_WC_GRH
: 0;
232 if (unlikely(is_qp1(qp
->ibqp
.qp_type
))) {
233 u16 pkey
= be32_to_cpu(cqe
->imm_inval_pkey
) & 0xffff;
235 ib_find_cached_pkey(&dev
->ib_dev
, qp
->port
, pkey
,
241 if (ll
!= IB_LINK_LAYER_ETHERNET
)
244 switch (wc
->sl
& 0x3) {
245 case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH
:
246 wc
->network_hdr_type
= RDMA_NETWORK_IB
;
248 case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6
:
249 wc
->network_hdr_type
= RDMA_NETWORK_IPV6
;
251 case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4
:
252 wc
->network_hdr_type
= RDMA_NETWORK_IPV4
;
255 wc
->wc_flags
|= IB_WC_WITH_NETWORK_HDR_TYPE
;
258 static void dump_cqe(struct mlx5_ib_dev
*dev
, struct mlx5_err_cqe
*cqe
)
260 __be32
*p
= (__be32
*)cqe
;
263 mlx5_ib_warn(dev
, "dump error cqe\n");
264 for (i
= 0; i
< sizeof(*cqe
) / 16; i
++, p
+= 4)
265 pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p
[0]),
266 be32_to_cpu(p
[1]), be32_to_cpu(p
[2]),
270 static void mlx5_handle_error_cqe(struct mlx5_ib_dev
*dev
,
271 struct mlx5_err_cqe
*cqe
,
276 switch (cqe
->syndrome
) {
277 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR
:
278 wc
->status
= IB_WC_LOC_LEN_ERR
;
280 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR
:
281 wc
->status
= IB_WC_LOC_QP_OP_ERR
;
283 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR
:
284 wc
->status
= IB_WC_LOC_PROT_ERR
;
286 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR
:
288 wc
->status
= IB_WC_WR_FLUSH_ERR
;
290 case MLX5_CQE_SYNDROME_MW_BIND_ERR
:
291 wc
->status
= IB_WC_MW_BIND_ERR
;
293 case MLX5_CQE_SYNDROME_BAD_RESP_ERR
:
294 wc
->status
= IB_WC_BAD_RESP_ERR
;
296 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR
:
297 wc
->status
= IB_WC_LOC_ACCESS_ERR
;
299 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR
:
300 wc
->status
= IB_WC_REM_INV_REQ_ERR
;
302 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR
:
303 wc
->status
= IB_WC_REM_ACCESS_ERR
;
305 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR
:
306 wc
->status
= IB_WC_REM_OP_ERR
;
308 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR
:
309 wc
->status
= IB_WC_RETRY_EXC_ERR
;
312 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR
:
313 wc
->status
= IB_WC_RNR_RETRY_EXC_ERR
;
316 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR
:
317 wc
->status
= IB_WC_REM_ABORT_ERR
;
320 wc
->status
= IB_WC_GENERAL_ERR
;
324 wc
->vendor_err
= cqe
->vendor_err_synd
;
329 static int is_atomic_response(struct mlx5_ib_qp
*qp
, uint16_t idx
)
331 /* TBD: waiting decision
336 static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp
*qp
, uint16_t idx
)
338 struct mlx5_wqe_data_seg
*dpseg
;
341 dpseg
= mlx5_get_send_wqe(qp
, idx
) + sizeof(struct mlx5_wqe_ctrl_seg
) +
342 sizeof(struct mlx5_wqe_raddr_seg
) +
343 sizeof(struct mlx5_wqe_atomic_seg
);
344 addr
= (void *)(unsigned long)be64_to_cpu(dpseg
->addr
);
348 static void handle_atomic(struct mlx5_ib_qp
*qp
, struct mlx5_cqe64
*cqe64
,
355 if (!is_atomic_response(qp
, idx
))
358 byte_count
= be32_to_cpu(cqe64
->byte_cnt
);
359 addr
= mlx5_get_atomic_laddr(qp
, idx
);
361 if (byte_count
== 4) {
362 *(uint32_t *)addr
= be32_to_cpu(*((__be32
*)addr
));
364 for (i
= 0; i
< byte_count
; i
+= 8) {
365 *(uint64_t *)addr
= be64_to_cpu(*((__be64
*)addr
));
373 static void handle_atomics(struct mlx5_ib_qp
*qp
, struct mlx5_cqe64
*cqe64
,
379 idx
= tail
& (qp
->sq
.wqe_cnt
- 1);
380 handle_atomic(qp
, cqe64
, idx
);
384 tail
= qp
->sq
.w_list
[idx
].next
;
386 tail
= qp
->sq
.w_list
[idx
].next
;
387 qp
->sq
.last_poll
= tail
;
390 static void free_cq_buf(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq_buf
*buf
)
392 mlx5_buf_free(dev
->mdev
, &buf
->buf
);
395 static void get_sig_err_item(struct mlx5_sig_err_cqe
*cqe
,
396 struct ib_sig_err
*item
)
398 u16 syndrome
= be16_to_cpu(cqe
->syndrome
);
400 #define GUARD_ERR (1 << 13)
401 #define APPTAG_ERR (1 << 12)
402 #define REFTAG_ERR (1 << 11)
404 if (syndrome
& GUARD_ERR
) {
405 item
->err_type
= IB_SIG_BAD_GUARD
;
406 item
->expected
= be32_to_cpu(cqe
->expected_trans_sig
) >> 16;
407 item
->actual
= be32_to_cpu(cqe
->actual_trans_sig
) >> 16;
409 if (syndrome
& REFTAG_ERR
) {
410 item
->err_type
= IB_SIG_BAD_REFTAG
;
411 item
->expected
= be32_to_cpu(cqe
->expected_reftag
);
412 item
->actual
= be32_to_cpu(cqe
->actual_reftag
);
414 if (syndrome
& APPTAG_ERR
) {
415 item
->err_type
= IB_SIG_BAD_APPTAG
;
416 item
->expected
= be32_to_cpu(cqe
->expected_trans_sig
) & 0xffff;
417 item
->actual
= be32_to_cpu(cqe
->actual_trans_sig
) & 0xffff;
419 pr_err("Got signature completion error with bad syndrome %04x\n",
423 item
->sig_err_offset
= be64_to_cpu(cqe
->err_offset
);
424 item
->key
= be32_to_cpu(cqe
->mkey
);
427 static int mlx5_poll_one(struct mlx5_ib_cq
*cq
,
428 struct mlx5_ib_qp
**cur_qp
,
431 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
432 struct mlx5_err_cqe
*err_cqe
;
433 struct mlx5_cqe64
*cqe64
;
434 struct mlx5_core_qp
*mqp
;
435 struct mlx5_ib_wq
*wq
;
436 struct mlx5_sig_err_cqe
*sig_err_cqe
;
437 struct mlx5_core_mkey
*mmkey
;
438 struct mlx5_ib_mr
*mr
;
446 cqe
= next_cqe_sw(cq
);
450 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
452 ++cq
->mcq
.cons_index
;
454 /* Make sure we read CQ entry contents after we've checked the
459 opcode
= cqe64
->op_own
>> 4;
460 if (unlikely(opcode
== MLX5_CQE_RESIZE_CQ
)) {
461 if (likely(cq
->resize_buf
)) {
462 free_cq_buf(dev
, &cq
->buf
);
463 cq
->buf
= *cq
->resize_buf
;
464 kfree(cq
->resize_buf
);
465 cq
->resize_buf
= NULL
;
468 mlx5_ib_warn(dev
, "unexpected resize cqe\n");
472 qpn
= ntohl(cqe64
->sop_drop_qpn
) & 0xffffff;
473 if (!*cur_qp
|| (qpn
!= (*cur_qp
)->ibqp
.qp_num
)) {
474 /* We do not have to take the QP table lock here,
475 * because CQs will be locked while QPs are removed
478 mqp
= __mlx5_qp_lookup(dev
->mdev
, qpn
);
479 if (unlikely(!mqp
)) {
480 mlx5_ib_warn(dev
, "CQE@CQ %06x for unknown QPN %6x\n",
485 *cur_qp
= to_mibqp(mqp
);
488 wc
->qp
= &(*cur_qp
)->ibqp
;
492 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
493 idx
= wqe_ctr
& (wq
->wqe_cnt
- 1);
494 handle_good_req(wc
, cqe64
, wq
, idx
);
495 handle_atomics(*cur_qp
, cqe64
, wq
->last_poll
, idx
);
496 wc
->wr_id
= wq
->wrid
[idx
];
497 wq
->tail
= wq
->wqe_head
[idx
] + 1;
498 wc
->status
= IB_WC_SUCCESS
;
500 case MLX5_CQE_RESP_WR_IMM
:
501 case MLX5_CQE_RESP_SEND
:
502 case MLX5_CQE_RESP_SEND_IMM
:
503 case MLX5_CQE_RESP_SEND_INV
:
504 handle_responder(wc
, cqe64
, *cur_qp
);
505 wc
->status
= IB_WC_SUCCESS
;
507 case MLX5_CQE_RESIZE_CQ
:
509 case MLX5_CQE_REQ_ERR
:
510 case MLX5_CQE_RESP_ERR
:
511 err_cqe
= (struct mlx5_err_cqe
*)cqe64
;
512 mlx5_handle_error_cqe(dev
, err_cqe
, wc
);
513 mlx5_ib_dbg(dev
, "%s error cqe on cqn 0x%x:\n",
514 opcode
== MLX5_CQE_REQ_ERR
?
515 "Requestor" : "Responder", cq
->mcq
.cqn
);
516 mlx5_ib_dbg(dev
, "syndrome 0x%x, vendor syndrome 0x%x\n",
517 err_cqe
->syndrome
, err_cqe
->vendor_err_synd
);
518 if (opcode
== MLX5_CQE_REQ_ERR
) {
520 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
521 idx
= wqe_ctr
& (wq
->wqe_cnt
- 1);
522 wc
->wr_id
= wq
->wrid
[idx
];
523 wq
->tail
= wq
->wqe_head
[idx
] + 1;
525 struct mlx5_ib_srq
*srq
;
527 if ((*cur_qp
)->ibqp
.srq
) {
528 srq
= to_msrq((*cur_qp
)->ibqp
.srq
);
529 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
530 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
531 mlx5_ib_free_srq_wqe(srq
, wqe_ctr
);
534 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
539 case MLX5_CQE_SIG_ERR
:
540 sig_err_cqe
= (struct mlx5_sig_err_cqe
*)cqe64
;
542 read_lock(&dev
->mdev
->priv
.mkey_table
.lock
);
543 mmkey
= __mlx5_mr_lookup(dev
->mdev
,
544 mlx5_base_mkey(be32_to_cpu(sig_err_cqe
->mkey
)));
545 if (unlikely(!mmkey
)) {
546 read_unlock(&dev
->mdev
->priv
.mkey_table
.lock
);
547 mlx5_ib_warn(dev
, "CQE@CQ %06x for unknown MR %6x\n",
548 cq
->mcq
.cqn
, be32_to_cpu(sig_err_cqe
->mkey
));
552 mr
= to_mibmr(mmkey
);
553 get_sig_err_item(sig_err_cqe
, &mr
->sig
->err_item
);
554 mr
->sig
->sig_err_exists
= true;
555 mr
->sig
->sigerr_count
++;
557 mlx5_ib_warn(dev
, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
558 cq
->mcq
.cqn
, mr
->sig
->err_item
.key
,
559 mr
->sig
->err_item
.err_type
,
560 mr
->sig
->err_item
.sig_err_offset
,
561 mr
->sig
->err_item
.expected
,
562 mr
->sig
->err_item
.actual
);
564 read_unlock(&dev
->mdev
->priv
.mkey_table
.lock
);
571 static int poll_soft_wc(struct mlx5_ib_cq
*cq
, int num_entries
,
574 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
575 struct mlx5_ib_wc
*soft_wc
, *next
;
578 list_for_each_entry_safe(soft_wc
, next
, &cq
->wc_list
, list
) {
579 if (npolled
>= num_entries
)
582 mlx5_ib_dbg(dev
, "polled software generated completion on CQ 0x%x\n",
585 wc
[npolled
++] = soft_wc
->wc
;
586 list_del(&soft_wc
->list
);
593 int mlx5_ib_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
595 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
596 struct mlx5_ib_qp
*cur_qp
= NULL
;
602 spin_lock_irqsave(&cq
->lock
, flags
);
604 if (unlikely(!list_empty(&cq
->wc_list
)))
605 soft_polled
= poll_soft_wc(cq
, num_entries
, wc
);
607 for (npolled
= 0; npolled
< num_entries
- soft_polled
; npolled
++) {
608 err
= mlx5_poll_one(cq
, &cur_qp
, wc
+ soft_polled
+ npolled
);
614 mlx5_cq_set_ci(&cq
->mcq
);
616 spin_unlock_irqrestore(&cq
->lock
, flags
);
618 if (err
== 0 || err
== -EAGAIN
)
619 return soft_polled
+ npolled
;
624 int mlx5_ib_arm_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
626 struct mlx5_core_dev
*mdev
= to_mdev(ibcq
->device
)->mdev
;
627 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
628 void __iomem
*uar_page
= mdev
->priv
.uuari
.uars
[0].map
;
629 unsigned long irq_flags
;
632 spin_lock_irqsave(&cq
->lock
, irq_flags
);
633 if (cq
->notify_flags
!= IB_CQ_NEXT_COMP
)
634 cq
->notify_flags
= flags
& IB_CQ_SOLICITED_MASK
;
636 if ((flags
& IB_CQ_REPORT_MISSED_EVENTS
) && !list_empty(&cq
->wc_list
))
638 spin_unlock_irqrestore(&cq
->lock
, irq_flags
);
640 mlx5_cq_arm(&cq
->mcq
,
641 (flags
& IB_CQ_SOLICITED_MASK
) == IB_CQ_SOLICITED
?
642 MLX5_CQ_DB_REQ_NOT_SOL
: MLX5_CQ_DB_REQ_NOT
,
644 MLX5_GET_DOORBELL_LOCK(&mdev
->priv
.cq_uar_lock
),
645 to_mcq(ibcq
)->mcq
.cons_index
);
650 static int alloc_cq_buf(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq_buf
*buf
,
651 int nent
, int cqe_size
)
655 err
= mlx5_buf_alloc(dev
->mdev
, nent
* cqe_size
, &buf
->buf
);
659 buf
->cqe_size
= cqe_size
;
665 static int create_cq_user(struct mlx5_ib_dev
*dev
, struct ib_udata
*udata
,
666 struct ib_ucontext
*context
, struct mlx5_ib_cq
*cq
,
667 int entries
, struct mlx5_create_cq_mbox_in
**cqb
,
668 int *cqe_size
, int *index
, int *inlen
)
670 struct mlx5_ib_create_cq ucmd
;
678 (udata
->inlen
- sizeof(struct ib_uverbs_cmd_hdr
) <
679 sizeof(ucmd
)) ? (sizeof(ucmd
) -
680 sizeof(ucmd
.reserved
)) : sizeof(ucmd
);
682 if (ib_copy_from_udata(&ucmd
, udata
, ucmdlen
))
685 if (ucmdlen
== sizeof(ucmd
) &&
689 if (ucmd
.cqe_size
!= 64 && ucmd
.cqe_size
!= 128)
692 *cqe_size
= ucmd
.cqe_size
;
694 cq
->buf
.umem
= ib_umem_get(context
, ucmd
.buf_addr
,
695 entries
* ucmd
.cqe_size
,
696 IB_ACCESS_LOCAL_WRITE
, 1);
697 if (IS_ERR(cq
->buf
.umem
)) {
698 err
= PTR_ERR(cq
->buf
.umem
);
702 err
= mlx5_ib_db_map_user(to_mucontext(context
), ucmd
.db_addr
,
707 mlx5_ib_cont_pages(cq
->buf
.umem
, ucmd
.buf_addr
, &npages
, &page_shift
,
709 mlx5_ib_dbg(dev
, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
710 ucmd
.buf_addr
, entries
* ucmd
.cqe_size
, npages
, page_shift
, ncont
);
712 *inlen
= sizeof(**cqb
) + sizeof(*(*cqb
)->pas
) * ncont
;
713 *cqb
= mlx5_vzalloc(*inlen
);
718 mlx5_ib_populate_pas(dev
, cq
->buf
.umem
, page_shift
, (*cqb
)->pas
, 0);
719 (*cqb
)->ctx
.log_pg_sz
= page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
721 *index
= to_mucontext(context
)->uuari
.uars
[0].index
;
726 mlx5_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
729 ib_umem_release(cq
->buf
.umem
);
733 static void destroy_cq_user(struct mlx5_ib_cq
*cq
, struct ib_ucontext
*context
)
735 mlx5_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
736 ib_umem_release(cq
->buf
.umem
);
739 static void init_cq_buf(struct mlx5_ib_cq
*cq
, struct mlx5_ib_cq_buf
*buf
)
743 struct mlx5_cqe64
*cqe64
;
745 for (i
= 0; i
< buf
->nent
; i
++) {
746 cqe
= get_cqe_from_buf(buf
, i
, buf
->cqe_size
);
747 cqe64
= buf
->cqe_size
== 64 ? cqe
: cqe
+ 64;
748 cqe64
->op_own
= MLX5_CQE_INVALID
<< 4;
752 static int create_cq_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
753 int entries
, int cqe_size
,
754 struct mlx5_create_cq_mbox_in
**cqb
,
755 int *index
, int *inlen
)
759 err
= mlx5_db_alloc(dev
->mdev
, &cq
->db
);
763 cq
->mcq
.set_ci_db
= cq
->db
.db
;
764 cq
->mcq
.arm_db
= cq
->db
.db
+ 1;
765 cq
->mcq
.cqe_sz
= cqe_size
;
767 err
= alloc_cq_buf(dev
, &cq
->buf
, entries
, cqe_size
);
771 init_cq_buf(cq
, &cq
->buf
);
773 *inlen
= sizeof(**cqb
) + sizeof(*(*cqb
)->pas
) * cq
->buf
.buf
.npages
;
774 *cqb
= mlx5_vzalloc(*inlen
);
779 mlx5_fill_page_array(&cq
->buf
.buf
, (*cqb
)->pas
);
781 (*cqb
)->ctx
.log_pg_sz
= cq
->buf
.buf
.page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
782 *index
= dev
->mdev
->priv
.uuari
.uars
[0].index
;
787 free_cq_buf(dev
, &cq
->buf
);
790 mlx5_db_free(dev
->mdev
, &cq
->db
);
794 static void destroy_cq_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
)
796 free_cq_buf(dev
, &cq
->buf
);
797 mlx5_db_free(dev
->mdev
, &cq
->db
);
800 static void notify_soft_wc_handler(struct work_struct
*work
)
802 struct mlx5_ib_cq
*cq
= container_of(work
, struct mlx5_ib_cq
,
805 cq
->ibcq
.comp_handler(&cq
->ibcq
, cq
->ibcq
.cq_context
);
808 struct ib_cq
*mlx5_ib_create_cq(struct ib_device
*ibdev
,
809 const struct ib_cq_init_attr
*attr
,
810 struct ib_ucontext
*context
,
811 struct ib_udata
*udata
)
813 int entries
= attr
->cqe
;
814 int vector
= attr
->comp_vector
;
815 struct mlx5_create_cq_mbox_in
*cqb
= NULL
;
816 struct mlx5_ib_dev
*dev
= to_mdev(ibdev
);
817 struct mlx5_ib_cq
*cq
;
818 int uninitialized_var(index
);
819 int uninitialized_var(inlen
);
826 return ERR_PTR(-EINVAL
);
828 if (check_cq_create_flags(attr
->flags
))
829 return ERR_PTR(-EOPNOTSUPP
);
831 entries
= roundup_pow_of_two(entries
+ 1);
832 if (entries
> (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_cq_sz
)))
833 return ERR_PTR(-EINVAL
);
835 cq
= kzalloc(sizeof(*cq
), GFP_KERNEL
);
837 return ERR_PTR(-ENOMEM
);
839 cq
->ibcq
.cqe
= entries
- 1;
840 mutex_init(&cq
->resize_mutex
);
841 spin_lock_init(&cq
->lock
);
842 cq
->resize_buf
= NULL
;
843 cq
->resize_umem
= NULL
;
844 cq
->create_flags
= attr
->flags
;
847 err
= create_cq_user(dev
, udata
, context
, cq
, entries
,
848 &cqb
, &cqe_size
, &index
, &inlen
);
852 /* for now choose 64 bytes till we have a proper interface */
854 err
= create_cq_kernel(dev
, cq
, entries
, cqe_size
, &cqb
,
859 INIT_WORK(&cq
->notify_work
, notify_soft_wc_handler
);
862 cq
->cqe_size
= cqe_size
;
863 cqb
->ctx
.cqe_sz_flags
= cqe_sz_to_mlx_sz(cqe_size
) << 5;
865 if (cq
->create_flags
& IB_CQ_FLAGS_IGNORE_OVERRUN
)
866 cqb
->ctx
.cqe_sz_flags
|= (1 << 1);
868 cqb
->ctx
.log_sz_usr_page
= cpu_to_be32((ilog2(entries
) << 24) | index
);
869 err
= mlx5_vector2eqn(dev
->mdev
, vector
, &eqn
, &irqn
);
873 cqb
->ctx
.c_eqn
= cpu_to_be16(eqn
);
874 cqb
->ctx
.db_record_addr
= cpu_to_be64(cq
->db
.dma
);
876 err
= mlx5_core_create_cq(dev
->mdev
, &cq
->mcq
, cqb
, inlen
);
880 mlx5_ib_dbg(dev
, "cqn 0x%x\n", cq
->mcq
.cqn
);
882 cq
->mcq
.comp
= mlx5_ib_cq_comp
;
883 cq
->mcq
.event
= mlx5_ib_cq_event
;
885 INIT_LIST_HEAD(&cq
->wc_list
);
888 if (ib_copy_to_udata(udata
, &cq
->mcq
.cqn
, sizeof(__u32
))) {
898 mlx5_core_destroy_cq(dev
->mdev
, &cq
->mcq
);
903 destroy_cq_user(cq
, context
);
905 destroy_cq_kernel(dev
, cq
);
914 int mlx5_ib_destroy_cq(struct ib_cq
*cq
)
916 struct mlx5_ib_dev
*dev
= to_mdev(cq
->device
);
917 struct mlx5_ib_cq
*mcq
= to_mcq(cq
);
918 struct ib_ucontext
*context
= NULL
;
921 context
= cq
->uobject
->context
;
923 mlx5_core_destroy_cq(dev
->mdev
, &mcq
->mcq
);
925 destroy_cq_user(mcq
, context
);
927 destroy_cq_kernel(dev
, mcq
);
934 static int is_equal_rsn(struct mlx5_cqe64
*cqe64
, u32 rsn
)
936 return rsn
== (ntohl(cqe64
->sop_drop_qpn
) & 0xffffff);
939 void __mlx5_ib_cq_clean(struct mlx5_ib_cq
*cq
, u32 rsn
, struct mlx5_ib_srq
*srq
)
941 struct mlx5_cqe64
*cqe64
, *dest64
;
950 /* First we need to find the current producer index, so we
951 * know where to start cleaning from. It doesn't matter if HW
952 * adds new entries after this loop -- the QP we're worried
953 * about is already in RESET, so the new entries won't come
954 * from our QP and therefore don't need to be checked.
956 for (prod_index
= cq
->mcq
.cons_index
; get_sw_cqe(cq
, prod_index
); prod_index
++)
957 if (prod_index
== cq
->mcq
.cons_index
+ cq
->ibcq
.cqe
)
960 /* Now sweep backwards through the CQ, removing CQ entries
961 * that match our QP by copying older entries on top of them.
963 while ((int) --prod_index
- (int) cq
->mcq
.cons_index
>= 0) {
964 cqe
= get_cqe(cq
, prod_index
& cq
->ibcq
.cqe
);
965 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
966 if (is_equal_rsn(cqe64
, rsn
)) {
967 if (srq
&& (ntohl(cqe64
->srqn
) & 0xffffff))
968 mlx5_ib_free_srq_wqe(srq
, be16_to_cpu(cqe64
->wqe_counter
));
971 dest
= get_cqe(cq
, (prod_index
+ nfreed
) & cq
->ibcq
.cqe
);
972 dest64
= (cq
->mcq
.cqe_sz
== 64) ? dest
: dest
+ 64;
973 owner_bit
= dest64
->op_own
& MLX5_CQE_OWNER_MASK
;
974 memcpy(dest
, cqe
, cq
->mcq
.cqe_sz
);
975 dest64
->op_own
= owner_bit
|
976 (dest64
->op_own
& ~MLX5_CQE_OWNER_MASK
);
981 cq
->mcq
.cons_index
+= nfreed
;
982 /* Make sure update of buffer contents is done before
983 * updating consumer index.
986 mlx5_cq_set_ci(&cq
->mcq
);
990 void mlx5_ib_cq_clean(struct mlx5_ib_cq
*cq
, u32 qpn
, struct mlx5_ib_srq
*srq
)
995 spin_lock_irq(&cq
->lock
);
996 __mlx5_ib_cq_clean(cq
, qpn
, srq
);
997 spin_unlock_irq(&cq
->lock
);
1000 int mlx5_ib_modify_cq(struct ib_cq
*cq
, u16 cq_count
, u16 cq_period
)
1002 struct mlx5_modify_cq_mbox_in
*in
;
1003 struct mlx5_ib_dev
*dev
= to_mdev(cq
->device
);
1004 struct mlx5_ib_cq
*mcq
= to_mcq(cq
);
1008 if (!MLX5_CAP_GEN(dev
->mdev
, cq_moderation
))
1011 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
1015 in
->cqn
= cpu_to_be32(mcq
->mcq
.cqn
);
1016 fsel
= (MLX5_CQ_MODIFY_PERIOD
| MLX5_CQ_MODIFY_COUNT
);
1017 in
->ctx
.cq_period
= cpu_to_be16(cq_period
);
1018 in
->ctx
.cq_max_count
= cpu_to_be16(cq_count
);
1019 in
->field_select
= cpu_to_be32(fsel
);
1020 err
= mlx5_core_modify_cq(dev
->mdev
, &mcq
->mcq
, in
, sizeof(*in
));
1024 mlx5_ib_warn(dev
, "modify cq 0x%x failed\n", mcq
->mcq
.cqn
);
1029 static int resize_user(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
1030 int entries
, struct ib_udata
*udata
, int *npas
,
1031 int *page_shift
, int *cqe_size
)
1033 struct mlx5_ib_resize_cq ucmd
;
1034 struct ib_umem
*umem
;
1037 struct ib_ucontext
*context
= cq
->buf
.umem
->context
;
1039 err
= ib_copy_from_udata(&ucmd
, udata
, sizeof(ucmd
));
1043 if (ucmd
.reserved0
|| ucmd
.reserved1
)
1046 umem
= ib_umem_get(context
, ucmd
.buf_addr
, entries
* ucmd
.cqe_size
,
1047 IB_ACCESS_LOCAL_WRITE
, 1);
1049 err
= PTR_ERR(umem
);
1053 mlx5_ib_cont_pages(umem
, ucmd
.buf_addr
, &npages
, page_shift
,
1056 cq
->resize_umem
= umem
;
1057 *cqe_size
= ucmd
.cqe_size
;
1062 static void un_resize_user(struct mlx5_ib_cq
*cq
)
1064 ib_umem_release(cq
->resize_umem
);
1067 static int resize_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
1068 int entries
, int cqe_size
)
1072 cq
->resize_buf
= kzalloc(sizeof(*cq
->resize_buf
), GFP_KERNEL
);
1073 if (!cq
->resize_buf
)
1076 err
= alloc_cq_buf(dev
, cq
->resize_buf
, entries
, cqe_size
);
1080 init_cq_buf(cq
, cq
->resize_buf
);
1085 kfree(cq
->resize_buf
);
1089 static void un_resize_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
)
1091 free_cq_buf(dev
, cq
->resize_buf
);
1092 cq
->resize_buf
= NULL
;
1095 static int copy_resize_cqes(struct mlx5_ib_cq
*cq
)
1097 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
1098 struct mlx5_cqe64
*scqe64
;
1099 struct mlx5_cqe64
*dcqe64
;
1108 ssize
= cq
->buf
.cqe_size
;
1109 dsize
= cq
->resize_buf
->cqe_size
;
1110 if (ssize
!= dsize
) {
1111 mlx5_ib_warn(dev
, "resize from different cqe size is not supported\n");
1115 i
= cq
->mcq
.cons_index
;
1116 scqe
= get_sw_cqe(cq
, i
);
1117 scqe64
= ssize
== 64 ? scqe
: scqe
+ 64;
1120 mlx5_ib_warn(dev
, "expected cqe in sw ownership\n");
1124 while ((scqe64
->op_own
>> 4) != MLX5_CQE_RESIZE_CQ
) {
1125 dcqe
= get_cqe_from_buf(cq
->resize_buf
,
1126 (i
+ 1) & (cq
->resize_buf
->nent
),
1128 dcqe64
= dsize
== 64 ? dcqe
: dcqe
+ 64;
1129 sw_own
= sw_ownership_bit(i
+ 1, cq
->resize_buf
->nent
);
1130 memcpy(dcqe
, scqe
, dsize
);
1131 dcqe64
->op_own
= (dcqe64
->op_own
& ~MLX5_CQE_OWNER_MASK
) | sw_own
;
1134 scqe
= get_sw_cqe(cq
, i
);
1135 scqe64
= ssize
== 64 ? scqe
: scqe
+ 64;
1137 mlx5_ib_warn(dev
, "expected cqe in sw ownership\n");
1141 if (scqe
== start_cqe
) {
1142 pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
1147 ++cq
->mcq
.cons_index
;
1151 int mlx5_ib_resize_cq(struct ib_cq
*ibcq
, int entries
, struct ib_udata
*udata
)
1153 struct mlx5_ib_dev
*dev
= to_mdev(ibcq
->device
);
1154 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
1155 struct mlx5_modify_cq_mbox_in
*in
;
1160 int uninitialized_var(cqe_size
);
1161 unsigned long flags
;
1163 if (!MLX5_CAP_GEN(dev
->mdev
, cq_resize
)) {
1164 pr_info("Firmware does not support resize CQ\n");
1171 entries
= roundup_pow_of_two(entries
+ 1);
1172 if (entries
> (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_cq_sz
)) + 1)
1175 if (entries
== ibcq
->cqe
+ 1)
1178 mutex_lock(&cq
->resize_mutex
);
1180 err
= resize_user(dev
, cq
, entries
, udata
, &npas
, &page_shift
,
1184 err
= resize_kernel(dev
, cq
, entries
, cqe_size
);
1186 npas
= cq
->resize_buf
->buf
.npages
;
1187 page_shift
= cq
->resize_buf
->buf
.page_shift
;
1194 inlen
= sizeof(*in
) + npas
* sizeof(in
->pas
[0]);
1195 in
= mlx5_vzalloc(inlen
);
1202 mlx5_ib_populate_pas(dev
, cq
->resize_umem
, page_shift
,
1205 mlx5_fill_page_array(&cq
->resize_buf
->buf
, in
->pas
);
1207 in
->field_select
= cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE
|
1208 MLX5_MODIFY_CQ_MASK_PG_OFFSET
|
1209 MLX5_MODIFY_CQ_MASK_PG_SIZE
);
1210 in
->ctx
.log_pg_sz
= page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
1211 in
->ctx
.cqe_sz_flags
= cqe_sz_to_mlx_sz(cqe_size
) << 5;
1212 in
->ctx
.page_offset
= 0;
1213 in
->ctx
.log_sz_usr_page
= cpu_to_be32(ilog2(entries
) << 24);
1214 in
->hdr
.opmod
= cpu_to_be16(MLX5_CQ_OPMOD_RESIZE
);
1215 in
->cqn
= cpu_to_be32(cq
->mcq
.cqn
);
1217 err
= mlx5_core_modify_cq(dev
->mdev
, &cq
->mcq
, in
, inlen
);
1222 cq
->ibcq
.cqe
= entries
- 1;
1223 ib_umem_release(cq
->buf
.umem
);
1224 cq
->buf
.umem
= cq
->resize_umem
;
1225 cq
->resize_umem
= NULL
;
1227 struct mlx5_ib_cq_buf tbuf
;
1230 spin_lock_irqsave(&cq
->lock
, flags
);
1231 if (cq
->resize_buf
) {
1232 err
= copy_resize_cqes(cq
);
1235 cq
->buf
= *cq
->resize_buf
;
1236 kfree(cq
->resize_buf
);
1237 cq
->resize_buf
= NULL
;
1241 cq
->ibcq
.cqe
= entries
- 1;
1242 spin_unlock_irqrestore(&cq
->lock
, flags
);
1244 free_cq_buf(dev
, &tbuf
);
1246 mutex_unlock(&cq
->resize_mutex
);
1258 un_resize_kernel(dev
, cq
);
1260 mutex_unlock(&cq
->resize_mutex
);
1264 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev
*dev
, struct ib_cq
*ibcq
)
1266 struct mlx5_ib_cq
*cq
;
1272 return cq
->cqe_size
;
1275 /* Called from atomic context */
1276 int mlx5_ib_generate_wc(struct ib_cq
*ibcq
, struct ib_wc
*wc
)
1278 struct mlx5_ib_wc
*soft_wc
;
1279 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
1280 unsigned long flags
;
1282 soft_wc
= kmalloc(sizeof(*soft_wc
), GFP_ATOMIC
);
1287 spin_lock_irqsave(&cq
->lock
, flags
);
1288 list_add_tail(&soft_wc
->list
, &cq
->wc_list
);
1289 if (cq
->notify_flags
== IB_CQ_NEXT_COMP
||
1290 wc
->status
!= IB_WC_SUCCESS
) {
1291 cq
->notify_flags
= 0;
1292 schedule_work(&cq
->notify_work
);
1294 spin_unlock_irqrestore(&cq
->lock
, flags
);