2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/mlx4/cq.h>
34 #include <linux/mlx4/qp.h>
39 static void mlx4_ib_cq_comp(struct mlx4_cq
*cq
)
41 struct ib_cq
*ibcq
= &to_mibcq(cq
)->ibcq
;
42 ibcq
->comp_handler(ibcq
, ibcq
->cq_context
);
45 static void mlx4_ib_cq_event(struct mlx4_cq
*cq
, enum mlx4_event type
)
47 struct ib_event event
;
50 if (type
!= MLX4_EVENT_TYPE_CQ_ERROR
) {
51 printk(KERN_WARNING
"mlx4_ib: Unexpected event type %d "
52 "on CQ %06x\n", type
, cq
->cqn
);
56 ibcq
= &to_mibcq(cq
)->ibcq
;
57 if (ibcq
->event_handler
) {
58 event
.device
= ibcq
->device
;
59 event
.event
= IB_EVENT_CQ_ERR
;
60 event
.element
.cq
= ibcq
;
61 ibcq
->event_handler(&event
, ibcq
->cq_context
);
65 static void *get_cqe_from_buf(struct mlx4_ib_cq_buf
*buf
, int n
)
67 int offset
= n
* sizeof (struct mlx4_cqe
);
69 if (buf
->buf
.nbufs
== 1)
70 return buf
->buf
.u
.direct
.buf
+ offset
;
72 return buf
->buf
.u
.page_list
[offset
>> PAGE_SHIFT
].buf
+
73 (offset
& (PAGE_SIZE
- 1));
76 static void *get_cqe(struct mlx4_ib_cq
*cq
, int n
)
78 return get_cqe_from_buf(&cq
->buf
, n
);
81 static void *get_sw_cqe(struct mlx4_ib_cq
*cq
, int n
)
83 struct mlx4_cqe
*cqe
= get_cqe(cq
, n
& cq
->ibcq
.cqe
);
85 return (!!(cqe
->owner_sr_opcode
& MLX4_CQE_OWNER_MASK
) ^
86 !!(n
& (cq
->ibcq
.cqe
+ 1))) ? NULL
: cqe
;
89 static struct mlx4_cqe
*next_cqe_sw(struct mlx4_ib_cq
*cq
)
91 return get_sw_cqe(cq
, cq
->mcq
.cons_index
);
94 struct ib_cq
*mlx4_ib_create_cq(struct ib_device
*ibdev
, int entries
, int vector
,
95 struct ib_ucontext
*context
,
96 struct ib_udata
*udata
)
98 struct mlx4_ib_dev
*dev
= to_mdev(ibdev
);
99 struct mlx4_ib_cq
*cq
;
100 struct mlx4_uar
*uar
;
104 if (entries
< 1 || entries
> dev
->dev
->caps
.max_cqes
)
105 return ERR_PTR(-EINVAL
);
107 cq
= kmalloc(sizeof *cq
, GFP_KERNEL
);
109 return ERR_PTR(-ENOMEM
);
111 entries
= roundup_pow_of_two(entries
+ 1);
112 cq
->ibcq
.cqe
= entries
- 1;
113 buf_size
= entries
* sizeof (struct mlx4_cqe
);
114 spin_lock_init(&cq
->lock
);
117 struct mlx4_ib_create_cq ucmd
;
119 if (ib_copy_from_udata(&ucmd
, udata
, sizeof ucmd
)) {
124 cq
->umem
= ib_umem_get(context
, ucmd
.buf_addr
, buf_size
,
125 IB_ACCESS_LOCAL_WRITE
);
126 if (IS_ERR(cq
->umem
)) {
127 err
= PTR_ERR(cq
->umem
);
131 err
= mlx4_mtt_init(dev
->dev
, ib_umem_page_count(cq
->umem
),
132 ilog2(cq
->umem
->page_size
), &cq
->buf
.mtt
);
136 err
= mlx4_ib_umem_write_mtt(dev
, &cq
->buf
.mtt
, cq
->umem
);
140 err
= mlx4_ib_db_map_user(to_mucontext(context
), ucmd
.db_addr
,
145 uar
= &to_mucontext(context
)->uar
;
147 err
= mlx4_ib_db_alloc(dev
, &cq
->db
, 1);
151 cq
->mcq
.set_ci_db
= cq
->db
.db
;
152 cq
->mcq
.arm_db
= cq
->db
.db
+ 1;
153 *cq
->mcq
.set_ci_db
= 0;
156 if (mlx4_buf_alloc(dev
->dev
, buf_size
, PAGE_SIZE
* 2, &cq
->buf
.buf
)) {
161 err
= mlx4_mtt_init(dev
->dev
, cq
->buf
.buf
.npages
, cq
->buf
.buf
.page_shift
,
166 err
= mlx4_buf_write_mtt(dev
->dev
, &cq
->buf
.mtt
, &cq
->buf
.buf
);
170 uar
= &dev
->priv_uar
;
173 err
= mlx4_cq_alloc(dev
->dev
, entries
, &cq
->buf
.mtt
, uar
,
174 cq
->db
.dma
, &cq
->mcq
);
178 cq
->mcq
.comp
= mlx4_ib_cq_comp
;
179 cq
->mcq
.event
= mlx4_ib_cq_event
;
182 if (ib_copy_to_udata(udata
, &cq
->mcq
.cqn
, sizeof (__u32
))) {
191 mlx4_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
194 mlx4_mtt_cleanup(dev
->dev
, &cq
->buf
.mtt
);
198 ib_umem_release(cq
->umem
);
200 mlx4_buf_free(dev
->dev
, entries
* sizeof (struct mlx4_cqe
),
205 mlx4_ib_db_free(dev
, &cq
->db
);
213 int mlx4_ib_destroy_cq(struct ib_cq
*cq
)
215 struct mlx4_ib_dev
*dev
= to_mdev(cq
->device
);
216 struct mlx4_ib_cq
*mcq
= to_mcq(cq
);
218 mlx4_cq_free(dev
->dev
, &mcq
->mcq
);
219 mlx4_mtt_cleanup(dev
->dev
, &mcq
->buf
.mtt
);
222 mlx4_ib_db_unmap_user(to_mucontext(cq
->uobject
->context
), &mcq
->db
);
223 ib_umem_release(mcq
->umem
);
225 mlx4_buf_free(dev
->dev
, (cq
->cqe
+ 1) * sizeof (struct mlx4_cqe
),
227 mlx4_ib_db_free(dev
, &mcq
->db
);
235 static void dump_cqe(void *cqe
)
239 printk(KERN_DEBUG
"CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
240 be32_to_cpu(buf
[0]), be32_to_cpu(buf
[1]), be32_to_cpu(buf
[2]),
241 be32_to_cpu(buf
[3]), be32_to_cpu(buf
[4]), be32_to_cpu(buf
[5]),
242 be32_to_cpu(buf
[6]), be32_to_cpu(buf
[7]));
245 static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe
*cqe
,
248 if (cqe
->syndrome
== MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR
) {
249 printk(KERN_DEBUG
"local QP operation err "
250 "(QPN %06x, WQE index %x, vendor syndrome %02x, "
252 be32_to_cpu(cqe
->my_qpn
), be16_to_cpu(cqe
->wqe_index
),
253 cqe
->vendor_err_syndrome
,
254 cqe
->owner_sr_opcode
& ~MLX4_CQE_OWNER_MASK
);
258 switch (cqe
->syndrome
) {
259 case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR
:
260 wc
->status
= IB_WC_LOC_LEN_ERR
;
262 case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR
:
263 wc
->status
= IB_WC_LOC_QP_OP_ERR
;
265 case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR
:
266 wc
->status
= IB_WC_LOC_PROT_ERR
;
268 case MLX4_CQE_SYNDROME_WR_FLUSH_ERR
:
269 wc
->status
= IB_WC_WR_FLUSH_ERR
;
271 case MLX4_CQE_SYNDROME_MW_BIND_ERR
:
272 wc
->status
= IB_WC_MW_BIND_ERR
;
274 case MLX4_CQE_SYNDROME_BAD_RESP_ERR
:
275 wc
->status
= IB_WC_BAD_RESP_ERR
;
277 case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR
:
278 wc
->status
= IB_WC_LOC_ACCESS_ERR
;
280 case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR
:
281 wc
->status
= IB_WC_REM_INV_REQ_ERR
;
283 case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR
:
284 wc
->status
= IB_WC_REM_ACCESS_ERR
;
286 case MLX4_CQE_SYNDROME_REMOTE_OP_ERR
:
287 wc
->status
= IB_WC_REM_OP_ERR
;
289 case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR
:
290 wc
->status
= IB_WC_RETRY_EXC_ERR
;
292 case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR
:
293 wc
->status
= IB_WC_RNR_RETRY_EXC_ERR
;
295 case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR
:
296 wc
->status
= IB_WC_REM_ABORT_ERR
;
299 wc
->status
= IB_WC_GENERAL_ERR
;
303 wc
->vendor_err
= cqe
->vendor_err_syndrome
;
306 static int mlx4_ib_poll_one(struct mlx4_ib_cq
*cq
,
307 struct mlx4_ib_qp
**cur_qp
,
310 struct mlx4_cqe
*cqe
;
312 struct mlx4_ib_wq
*wq
;
313 struct mlx4_ib_srq
*srq
;
318 cqe
= next_cqe_sw(cq
);
322 ++cq
->mcq
.cons_index
;
325 * Make sure we read CQ entry contents after we've checked the
330 is_send
= cqe
->owner_sr_opcode
& MLX4_CQE_IS_SEND_MASK
;
331 is_error
= (cqe
->owner_sr_opcode
& MLX4_CQE_OPCODE_MASK
) ==
332 MLX4_CQE_OPCODE_ERROR
;
335 (be32_to_cpu(cqe
->my_qpn
) & 0xffffff) != (*cur_qp
)->mqp
.qpn
) {
337 * We do not have to take the QP table lock here,
338 * because CQs will be locked while QPs are removed
341 mqp
= __mlx4_qp_lookup(to_mdev(cq
->ibcq
.device
)->dev
,
342 be32_to_cpu(cqe
->my_qpn
));
343 if (unlikely(!mqp
)) {
344 printk(KERN_WARNING
"CQ %06x with entry for unknown QPN %06x\n",
345 cq
->mcq
.cqn
, be32_to_cpu(cqe
->my_qpn
) & 0xffffff);
349 *cur_qp
= to_mibqp(mqp
);
352 wc
->qp
= &(*cur_qp
)->ibqp
;
356 wqe_ctr
= be16_to_cpu(cqe
->wqe_index
);
357 wq
->tail
+= (u16
) (wqe_ctr
- (u16
) wq
->tail
);
358 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
360 } else if ((*cur_qp
)->ibqp
.srq
) {
361 srq
= to_msrq((*cur_qp
)->ibqp
.srq
);
362 wqe_ctr
= be16_to_cpu(cqe
->wqe_index
);
363 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
364 mlx4_ib_free_srq_wqe(srq
, wqe_ctr
);
367 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
371 if (unlikely(is_error
)) {
372 mlx4_ib_handle_error_cqe((struct mlx4_err_cqe
*) cqe
, wc
);
376 wc
->status
= IB_WC_SUCCESS
;
380 switch (cqe
->owner_sr_opcode
& MLX4_CQE_OPCODE_MASK
) {
381 case MLX4_OPCODE_RDMA_WRITE_IMM
:
382 wc
->wc_flags
|= IB_WC_WITH_IMM
;
383 case MLX4_OPCODE_RDMA_WRITE
:
384 wc
->opcode
= IB_WC_RDMA_WRITE
;
386 case MLX4_OPCODE_SEND_IMM
:
387 wc
->wc_flags
|= IB_WC_WITH_IMM
;
388 case MLX4_OPCODE_SEND
:
389 wc
->opcode
= IB_WC_SEND
;
391 case MLX4_OPCODE_RDMA_READ
:
392 wc
->opcode
= IB_WC_RDMA_READ
;
393 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
395 case MLX4_OPCODE_ATOMIC_CS
:
396 wc
->opcode
= IB_WC_COMP_SWAP
;
399 case MLX4_OPCODE_ATOMIC_FA
:
400 wc
->opcode
= IB_WC_FETCH_ADD
;
403 case MLX4_OPCODE_BIND_MW
:
404 wc
->opcode
= IB_WC_BIND_MW
;
408 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
410 switch (cqe
->owner_sr_opcode
& MLX4_CQE_OPCODE_MASK
) {
411 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM
:
412 wc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
413 wc
->wc_flags
= IB_WC_WITH_IMM
;
414 wc
->imm_data
= cqe
->immed_rss_invalid
;
416 case MLX4_RECV_OPCODE_SEND
:
417 wc
->opcode
= IB_WC_RECV
;
420 case MLX4_RECV_OPCODE_SEND_IMM
:
421 wc
->opcode
= IB_WC_RECV
;
422 wc
->wc_flags
= IB_WC_WITH_IMM
;
423 wc
->imm_data
= cqe
->immed_rss_invalid
;
427 wc
->slid
= be16_to_cpu(cqe
->rlid
);
428 wc
->sl
= cqe
->sl
>> 4;
429 wc
->src_qp
= be32_to_cpu(cqe
->g_mlpath_rqpn
) & 0xffffff;
430 wc
->dlid_path_bits
= (be32_to_cpu(cqe
->g_mlpath_rqpn
) >> 24) & 0x7f;
431 wc
->wc_flags
|= be32_to_cpu(cqe
->g_mlpath_rqpn
) & 0x80000000 ?
433 wc
->pkey_index
= be32_to_cpu(cqe
->immed_rss_invalid
) >> 16;
439 int mlx4_ib_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
441 struct mlx4_ib_cq
*cq
= to_mcq(ibcq
);
442 struct mlx4_ib_qp
*cur_qp
= NULL
;
447 spin_lock_irqsave(&cq
->lock
, flags
);
449 for (npolled
= 0; npolled
< num_entries
; ++npolled
) {
450 err
= mlx4_ib_poll_one(cq
, &cur_qp
, wc
+ npolled
);
456 mlx4_cq_set_ci(&cq
->mcq
);
458 spin_unlock_irqrestore(&cq
->lock
, flags
);
460 if (err
== 0 || err
== -EAGAIN
)
466 int mlx4_ib_arm_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
468 mlx4_cq_arm(&to_mcq(ibcq
)->mcq
,
469 (flags
& IB_CQ_SOLICITED_MASK
) == IB_CQ_SOLICITED
?
470 MLX4_CQ_DB_REQ_NOT_SOL
: MLX4_CQ_DB_REQ_NOT
,
471 to_mdev(ibcq
->device
)->uar_map
,
472 MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq
->device
)->uar_lock
));
477 void __mlx4_ib_cq_clean(struct mlx4_ib_cq
*cq
, u32 qpn
, struct mlx4_ib_srq
*srq
)
481 struct mlx4_cqe
*cqe
, *dest
;
485 * First we need to find the current producer index, so we
486 * know where to start cleaning from. It doesn't matter if HW
487 * adds new entries after this loop -- the QP we're worried
488 * about is already in RESET, so the new entries won't come
489 * from our QP and therefore don't need to be checked.
491 for (prod_index
= cq
->mcq
.cons_index
; get_sw_cqe(cq
, prod_index
); ++prod_index
)
492 if (prod_index
== cq
->mcq
.cons_index
+ cq
->ibcq
.cqe
)
496 * Now sweep backwards through the CQ, removing CQ entries
497 * that match our QP by copying older entries on top of them.
499 while ((int) --prod_index
- (int) cq
->mcq
.cons_index
>= 0) {
500 cqe
= get_cqe(cq
, prod_index
& cq
->ibcq
.cqe
);
501 if ((be32_to_cpu(cqe
->my_qpn
) & 0xffffff) == qpn
) {
502 if (srq
&& !(cqe
->owner_sr_opcode
& MLX4_CQE_IS_SEND_MASK
))
503 mlx4_ib_free_srq_wqe(srq
, be16_to_cpu(cqe
->wqe_index
));
506 dest
= get_cqe(cq
, (prod_index
+ nfreed
) & cq
->ibcq
.cqe
);
507 owner_bit
= dest
->owner_sr_opcode
& MLX4_CQE_OWNER_MASK
;
508 memcpy(dest
, cqe
, sizeof *cqe
);
509 dest
->owner_sr_opcode
= owner_bit
|
510 (dest
->owner_sr_opcode
& ~MLX4_CQE_OWNER_MASK
);
515 cq
->mcq
.cons_index
+= nfreed
;
517 * Make sure update of buffer contents is done before
518 * updating consumer index.
521 mlx4_cq_set_ci(&cq
->mcq
);
525 void mlx4_ib_cq_clean(struct mlx4_ib_cq
*cq
, u32 qpn
, struct mlx4_ib_srq
*srq
)
527 spin_lock_irq(&cq
->lock
);
528 __mlx4_ib_cq_clean(cq
, qpn
, srq
);
529 spin_unlock_irq(&cq
->lock
);