2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/mlx4/cq.h>
34 #include <linux/mlx4/qp.h>
39 static void mlx4_ib_cq_comp(struct mlx4_cq
*cq
)
41 struct ib_cq
*ibcq
= &to_mibcq(cq
)->ibcq
;
42 ibcq
->comp_handler(ibcq
, ibcq
->cq_context
);
45 static void mlx4_ib_cq_event(struct mlx4_cq
*cq
, enum mlx4_event type
)
47 struct ib_event event
;
50 if (type
!= MLX4_EVENT_TYPE_CQ_ERROR
) {
51 printk(KERN_WARNING
"mlx4_ib: Unexpected event type %d "
52 "on CQ %06x\n", type
, cq
->cqn
);
56 ibcq
= &to_mibcq(cq
)->ibcq
;
57 if (ibcq
->event_handler
) {
58 event
.device
= ibcq
->device
;
59 event
.event
= IB_EVENT_CQ_ERR
;
60 event
.element
.cq
= ibcq
;
61 ibcq
->event_handler(&event
, ibcq
->cq_context
);
65 static void *get_cqe_from_buf(struct mlx4_ib_cq_buf
*buf
, int n
)
67 return mlx4_buf_offset(&buf
->buf
, n
* sizeof (struct mlx4_cqe
));
70 static void *get_cqe(struct mlx4_ib_cq
*cq
, int n
)
72 return get_cqe_from_buf(&cq
->buf
, n
);
75 static void *get_sw_cqe(struct mlx4_ib_cq
*cq
, int n
)
77 struct mlx4_cqe
*cqe
= get_cqe(cq
, n
& cq
->ibcq
.cqe
);
79 return (!!(cqe
->owner_sr_opcode
& MLX4_CQE_OWNER_MASK
) ^
80 !!(n
& (cq
->ibcq
.cqe
+ 1))) ? NULL
: cqe
;
83 static struct mlx4_cqe
*next_cqe_sw(struct mlx4_ib_cq
*cq
)
85 return get_sw_cqe(cq
, cq
->mcq
.cons_index
);
88 struct ib_cq
*mlx4_ib_create_cq(struct ib_device
*ibdev
, int entries
, int vector
,
89 struct ib_ucontext
*context
,
90 struct ib_udata
*udata
)
92 struct mlx4_ib_dev
*dev
= to_mdev(ibdev
);
93 struct mlx4_ib_cq
*cq
;
98 if (entries
< 1 || entries
> dev
->dev
->caps
.max_cqes
)
99 return ERR_PTR(-EINVAL
);
101 cq
= kmalloc(sizeof *cq
, GFP_KERNEL
);
103 return ERR_PTR(-ENOMEM
);
105 entries
= roundup_pow_of_two(entries
+ 1);
106 cq
->ibcq
.cqe
= entries
- 1;
107 buf_size
= entries
* sizeof (struct mlx4_cqe
);
108 spin_lock_init(&cq
->lock
);
111 struct mlx4_ib_create_cq ucmd
;
113 if (ib_copy_from_udata(&ucmd
, udata
, sizeof ucmd
)) {
118 cq
->umem
= ib_umem_get(context
, ucmd
.buf_addr
, buf_size
,
119 IB_ACCESS_LOCAL_WRITE
);
120 if (IS_ERR(cq
->umem
)) {
121 err
= PTR_ERR(cq
->umem
);
125 err
= mlx4_mtt_init(dev
->dev
, ib_umem_page_count(cq
->umem
),
126 ilog2(cq
->umem
->page_size
), &cq
->buf
.mtt
);
130 err
= mlx4_ib_umem_write_mtt(dev
, &cq
->buf
.mtt
, cq
->umem
);
134 err
= mlx4_ib_db_map_user(to_mucontext(context
), ucmd
.db_addr
,
139 uar
= &to_mucontext(context
)->uar
;
141 err
= mlx4_ib_db_alloc(dev
, &cq
->db
, 1);
145 cq
->mcq
.set_ci_db
= cq
->db
.db
;
146 cq
->mcq
.arm_db
= cq
->db
.db
+ 1;
147 *cq
->mcq
.set_ci_db
= 0;
150 if (mlx4_buf_alloc(dev
->dev
, buf_size
, PAGE_SIZE
* 2, &cq
->buf
.buf
)) {
155 err
= mlx4_mtt_init(dev
->dev
, cq
->buf
.buf
.npages
, cq
->buf
.buf
.page_shift
,
160 err
= mlx4_buf_write_mtt(dev
->dev
, &cq
->buf
.mtt
, &cq
->buf
.buf
);
164 uar
= &dev
->priv_uar
;
167 err
= mlx4_cq_alloc(dev
->dev
, entries
, &cq
->buf
.mtt
, uar
,
168 cq
->db
.dma
, &cq
->mcq
);
172 cq
->mcq
.comp
= mlx4_ib_cq_comp
;
173 cq
->mcq
.event
= mlx4_ib_cq_event
;
176 if (ib_copy_to_udata(udata
, &cq
->mcq
.cqn
, sizeof (__u32
))) {
185 mlx4_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
188 mlx4_mtt_cleanup(dev
->dev
, &cq
->buf
.mtt
);
192 ib_umem_release(cq
->umem
);
194 mlx4_buf_free(dev
->dev
, entries
* sizeof (struct mlx4_cqe
),
199 mlx4_ib_db_free(dev
, &cq
->db
);
207 int mlx4_ib_destroy_cq(struct ib_cq
*cq
)
209 struct mlx4_ib_dev
*dev
= to_mdev(cq
->device
);
210 struct mlx4_ib_cq
*mcq
= to_mcq(cq
);
212 mlx4_cq_free(dev
->dev
, &mcq
->mcq
);
213 mlx4_mtt_cleanup(dev
->dev
, &mcq
->buf
.mtt
);
216 mlx4_ib_db_unmap_user(to_mucontext(cq
->uobject
->context
), &mcq
->db
);
217 ib_umem_release(mcq
->umem
);
219 mlx4_buf_free(dev
->dev
, (cq
->cqe
+ 1) * sizeof (struct mlx4_cqe
),
221 mlx4_ib_db_free(dev
, &mcq
->db
);
229 static void dump_cqe(void *cqe
)
233 printk(KERN_DEBUG
"CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
234 be32_to_cpu(buf
[0]), be32_to_cpu(buf
[1]), be32_to_cpu(buf
[2]),
235 be32_to_cpu(buf
[3]), be32_to_cpu(buf
[4]), be32_to_cpu(buf
[5]),
236 be32_to_cpu(buf
[6]), be32_to_cpu(buf
[7]));
239 static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe
*cqe
,
242 if (cqe
->syndrome
== MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR
) {
243 printk(KERN_DEBUG
"local QP operation err "
244 "(QPN %06x, WQE index %x, vendor syndrome %02x, "
246 be32_to_cpu(cqe
->my_qpn
), be16_to_cpu(cqe
->wqe_index
),
247 cqe
->vendor_err_syndrome
,
248 cqe
->owner_sr_opcode
& ~MLX4_CQE_OWNER_MASK
);
252 switch (cqe
->syndrome
) {
253 case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR
:
254 wc
->status
= IB_WC_LOC_LEN_ERR
;
256 case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR
:
257 wc
->status
= IB_WC_LOC_QP_OP_ERR
;
259 case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR
:
260 wc
->status
= IB_WC_LOC_PROT_ERR
;
262 case MLX4_CQE_SYNDROME_WR_FLUSH_ERR
:
263 wc
->status
= IB_WC_WR_FLUSH_ERR
;
265 case MLX4_CQE_SYNDROME_MW_BIND_ERR
:
266 wc
->status
= IB_WC_MW_BIND_ERR
;
268 case MLX4_CQE_SYNDROME_BAD_RESP_ERR
:
269 wc
->status
= IB_WC_BAD_RESP_ERR
;
271 case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR
:
272 wc
->status
= IB_WC_LOC_ACCESS_ERR
;
274 case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR
:
275 wc
->status
= IB_WC_REM_INV_REQ_ERR
;
277 case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR
:
278 wc
->status
= IB_WC_REM_ACCESS_ERR
;
280 case MLX4_CQE_SYNDROME_REMOTE_OP_ERR
:
281 wc
->status
= IB_WC_REM_OP_ERR
;
283 case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR
:
284 wc
->status
= IB_WC_RETRY_EXC_ERR
;
286 case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR
:
287 wc
->status
= IB_WC_RNR_RETRY_EXC_ERR
;
289 case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR
:
290 wc
->status
= IB_WC_REM_ABORT_ERR
;
293 wc
->status
= IB_WC_GENERAL_ERR
;
297 wc
->vendor_err
= cqe
->vendor_err_syndrome
;
300 static int mlx4_ib_poll_one(struct mlx4_ib_cq
*cq
,
301 struct mlx4_ib_qp
**cur_qp
,
304 struct mlx4_cqe
*cqe
;
306 struct mlx4_ib_wq
*wq
;
307 struct mlx4_ib_srq
*srq
;
313 cqe
= next_cqe_sw(cq
);
317 ++cq
->mcq
.cons_index
;
320 * Make sure we read CQ entry contents after we've checked the
325 is_send
= cqe
->owner_sr_opcode
& MLX4_CQE_IS_SEND_MASK
;
326 is_error
= (cqe
->owner_sr_opcode
& MLX4_CQE_OPCODE_MASK
) ==
327 MLX4_CQE_OPCODE_ERROR
;
329 if (unlikely((cqe
->owner_sr_opcode
& MLX4_CQE_OPCODE_MASK
) == MLX4_OPCODE_NOP
&&
331 printk(KERN_WARNING
"Completion for NOP opcode detected!\n");
336 (be32_to_cpu(cqe
->my_qpn
) & 0xffffff) != (*cur_qp
)->mqp
.qpn
) {
338 * We do not have to take the QP table lock here,
339 * because CQs will be locked while QPs are removed
342 mqp
= __mlx4_qp_lookup(to_mdev(cq
->ibcq
.device
)->dev
,
343 be32_to_cpu(cqe
->my_qpn
));
344 if (unlikely(!mqp
)) {
345 printk(KERN_WARNING
"CQ %06x with entry for unknown QPN %06x\n",
346 cq
->mcq
.cqn
, be32_to_cpu(cqe
->my_qpn
) & 0xffffff);
350 *cur_qp
= to_mibqp(mqp
);
353 wc
->qp
= &(*cur_qp
)->ibqp
;
357 if (!(*cur_qp
)->sq_signal_bits
) {
358 wqe_ctr
= be16_to_cpu(cqe
->wqe_index
);
359 wq
->tail
+= (u16
) (wqe_ctr
- (u16
) wq
->tail
);
361 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
363 } else if ((*cur_qp
)->ibqp
.srq
) {
364 srq
= to_msrq((*cur_qp
)->ibqp
.srq
);
365 wqe_ctr
= be16_to_cpu(cqe
->wqe_index
);
366 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
367 mlx4_ib_free_srq_wqe(srq
, wqe_ctr
);
370 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
374 if (unlikely(is_error
)) {
375 mlx4_ib_handle_error_cqe((struct mlx4_err_cqe
*) cqe
, wc
);
379 wc
->status
= IB_WC_SUCCESS
;
383 switch (cqe
->owner_sr_opcode
& MLX4_CQE_OPCODE_MASK
) {
384 case MLX4_OPCODE_RDMA_WRITE_IMM
:
385 wc
->wc_flags
|= IB_WC_WITH_IMM
;
386 case MLX4_OPCODE_RDMA_WRITE
:
387 wc
->opcode
= IB_WC_RDMA_WRITE
;
389 case MLX4_OPCODE_SEND_IMM
:
390 wc
->wc_flags
|= IB_WC_WITH_IMM
;
391 case MLX4_OPCODE_SEND
:
392 wc
->opcode
= IB_WC_SEND
;
394 case MLX4_OPCODE_RDMA_READ
:
395 wc
->opcode
= IB_WC_RDMA_READ
;
396 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
398 case MLX4_OPCODE_ATOMIC_CS
:
399 wc
->opcode
= IB_WC_COMP_SWAP
;
402 case MLX4_OPCODE_ATOMIC_FA
:
403 wc
->opcode
= IB_WC_FETCH_ADD
;
406 case MLX4_OPCODE_BIND_MW
:
407 wc
->opcode
= IB_WC_BIND_MW
;
411 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
413 switch (cqe
->owner_sr_opcode
& MLX4_CQE_OPCODE_MASK
) {
414 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM
:
415 wc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
416 wc
->wc_flags
= IB_WC_WITH_IMM
;
417 wc
->imm_data
= cqe
->immed_rss_invalid
;
419 case MLX4_RECV_OPCODE_SEND
:
420 wc
->opcode
= IB_WC_RECV
;
423 case MLX4_RECV_OPCODE_SEND_IMM
:
424 wc
->opcode
= IB_WC_RECV
;
425 wc
->wc_flags
= IB_WC_WITH_IMM
;
426 wc
->imm_data
= cqe
->immed_rss_invalid
;
430 wc
->slid
= be16_to_cpu(cqe
->rlid
);
431 wc
->sl
= cqe
->sl
>> 4;
432 g_mlpath_rqpn
= be32_to_cpu(cqe
->g_mlpath_rqpn
);
433 wc
->src_qp
= g_mlpath_rqpn
& 0xffffff;
434 wc
->dlid_path_bits
= (g_mlpath_rqpn
>> 24) & 0x7f;
435 wc
->wc_flags
|= g_mlpath_rqpn
& 0x80000000 ? IB_WC_GRH
: 0;
436 wc
->pkey_index
= be32_to_cpu(cqe
->immed_rss_invalid
) & 0x7f;
442 int mlx4_ib_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
444 struct mlx4_ib_cq
*cq
= to_mcq(ibcq
);
445 struct mlx4_ib_qp
*cur_qp
= NULL
;
450 spin_lock_irqsave(&cq
->lock
, flags
);
452 for (npolled
= 0; npolled
< num_entries
; ++npolled
) {
453 err
= mlx4_ib_poll_one(cq
, &cur_qp
, wc
+ npolled
);
459 mlx4_cq_set_ci(&cq
->mcq
);
461 spin_unlock_irqrestore(&cq
->lock
, flags
);
463 if (err
== 0 || err
== -EAGAIN
)
469 int mlx4_ib_arm_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
471 mlx4_cq_arm(&to_mcq(ibcq
)->mcq
,
472 (flags
& IB_CQ_SOLICITED_MASK
) == IB_CQ_SOLICITED
?
473 MLX4_CQ_DB_REQ_NOT_SOL
: MLX4_CQ_DB_REQ_NOT
,
474 to_mdev(ibcq
->device
)->uar_map
,
475 MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq
->device
)->uar_lock
));
480 void __mlx4_ib_cq_clean(struct mlx4_ib_cq
*cq
, u32 qpn
, struct mlx4_ib_srq
*srq
)
484 struct mlx4_cqe
*cqe
, *dest
;
488 * First we need to find the current producer index, so we
489 * know where to start cleaning from. It doesn't matter if HW
490 * adds new entries after this loop -- the QP we're worried
491 * about is already in RESET, so the new entries won't come
492 * from our QP and therefore don't need to be checked.
494 for (prod_index
= cq
->mcq
.cons_index
; get_sw_cqe(cq
, prod_index
); ++prod_index
)
495 if (prod_index
== cq
->mcq
.cons_index
+ cq
->ibcq
.cqe
)
499 * Now sweep backwards through the CQ, removing CQ entries
500 * that match our QP by copying older entries on top of them.
502 while ((int) --prod_index
- (int) cq
->mcq
.cons_index
>= 0) {
503 cqe
= get_cqe(cq
, prod_index
& cq
->ibcq
.cqe
);
504 if ((be32_to_cpu(cqe
->my_qpn
) & 0xffffff) == qpn
) {
505 if (srq
&& !(cqe
->owner_sr_opcode
& MLX4_CQE_IS_SEND_MASK
))
506 mlx4_ib_free_srq_wqe(srq
, be16_to_cpu(cqe
->wqe_index
));
509 dest
= get_cqe(cq
, (prod_index
+ nfreed
) & cq
->ibcq
.cqe
);
510 owner_bit
= dest
->owner_sr_opcode
& MLX4_CQE_OWNER_MASK
;
511 memcpy(dest
, cqe
, sizeof *cqe
);
512 dest
->owner_sr_opcode
= owner_bit
|
513 (dest
->owner_sr_opcode
& ~MLX4_CQE_OWNER_MASK
);
518 cq
->mcq
.cons_index
+= nfreed
;
520 * Make sure update of buffer contents is done before
521 * updating consumer index.
524 mlx4_cq_set_ci(&cq
->mcq
);
528 void mlx4_ib_cq_clean(struct mlx4_ib_cq
*cq
, u32 qpn
, struct mlx4_ib_srq
*srq
)
530 spin_lock_irq(&cq
->lock
);
531 __mlx4_ib_cq_clean(cq
, qpn
, srq
);
532 spin_unlock_irq(&cq
->lock
);