2 * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <rdma/uverbs_ioctl.h>
37 static void destroy_cq(struct c4iw_rdev
*rdev
, struct t4_cq
*cq
,
38 struct c4iw_dev_ucontext
*uctx
, struct sk_buff
*skb
,
39 struct c4iw_wr_wait
*wr_waitp
)
41 struct fw_ri_res_wr
*res_wr
;
42 struct fw_ri_res
*res
;
45 wr_len
= sizeof(*res_wr
) + sizeof(*res
);
46 set_wr_txq(skb
, CPL_PRIORITY_CONTROL
, 0);
48 res_wr
= __skb_put_zero(skb
, wr_len
);
49 res_wr
->op_nres
= cpu_to_be32(
50 FW_WR_OP_V(FW_RI_RES_WR
) |
51 FW_RI_RES_WR_NRES_V(1) |
53 res_wr
->len16_pkd
= cpu_to_be32(DIV_ROUND_UP(wr_len
, 16));
54 res_wr
->cookie
= (uintptr_t)wr_waitp
;
56 res
->u
.cq
.restype
= FW_RI_RES_TYPE_CQ
;
57 res
->u
.cq
.op
= FW_RI_RES_OP_RESET
;
58 res
->u
.cq
.iqid
= cpu_to_be32(cq
->cqid
);
60 c4iw_init_wr_wait(wr_waitp
);
61 c4iw_ref_send_wait(rdev
, skb
, wr_waitp
, 0, 0, __func__
);
64 dma_free_coherent(&(rdev
->lldi
.pdev
->dev
),
65 cq
->memsize
, cq
->queue
,
66 dma_unmap_addr(cq
, mapping
));
67 c4iw_put_cqid(rdev
, cq
->cqid
, uctx
);
70 static int create_cq(struct c4iw_rdev
*rdev
, struct t4_cq
*cq
,
71 struct c4iw_dev_ucontext
*uctx
,
72 struct c4iw_wr_wait
*wr_waitp
)
74 struct fw_ri_res_wr
*res_wr
;
75 struct fw_ri_res
*res
;
77 int user
= (uctx
!= &rdev
->uctx
);
80 struct c4iw_ucontext
*ucontext
= NULL
;
83 ucontext
= container_of(uctx
, struct c4iw_ucontext
, uctx
);
85 cq
->cqid
= c4iw_get_cqid(rdev
, uctx
);
92 cq
->sw_queue
= kzalloc(cq
->memsize
, GFP_KERNEL
);
98 cq
->queue
= dma_alloc_coherent(&rdev
->lldi
.pdev
->dev
, cq
->memsize
,
99 &cq
->dma_addr
, GFP_KERNEL
);
104 dma_unmap_addr_set(cq
, mapping
, cq
->dma_addr
);
106 if (user
&& ucontext
->is_32b_cqe
) {
107 cq
->qp_errp
= &((struct t4_status_page
*)
108 ((u8
*)cq
->queue
+ (cq
->size
- 1) *
109 (sizeof(*cq
->queue
) / 2)))->qp_err
;
111 cq
->qp_errp
= &((struct t4_status_page
*)
112 ((u8
*)cq
->queue
+ (cq
->size
- 1) *
113 sizeof(*cq
->queue
)))->qp_err
;
116 /* build fw_ri_res_wr */
117 wr_len
= sizeof(*res_wr
) + sizeof(*res
);
119 skb
= alloc_skb(wr_len
, GFP_KERNEL
);
124 set_wr_txq(skb
, CPL_PRIORITY_CONTROL
, 0);
126 res_wr
= __skb_put_zero(skb
, wr_len
);
127 res_wr
->op_nres
= cpu_to_be32(
128 FW_WR_OP_V(FW_RI_RES_WR
) |
129 FW_RI_RES_WR_NRES_V(1) |
131 res_wr
->len16_pkd
= cpu_to_be32(DIV_ROUND_UP(wr_len
, 16));
132 res_wr
->cookie
= (uintptr_t)wr_waitp
;
134 res
->u
.cq
.restype
= FW_RI_RES_TYPE_CQ
;
135 res
->u
.cq
.op
= FW_RI_RES_OP_WRITE
;
136 res
->u
.cq
.iqid
= cpu_to_be32(cq
->cqid
);
137 res
->u
.cq
.iqandst_to_iqandstindex
= cpu_to_be32(
138 FW_RI_RES_WR_IQANUS_V(0) |
139 FW_RI_RES_WR_IQANUD_V(1) |
140 FW_RI_RES_WR_IQANDST_F
|
141 FW_RI_RES_WR_IQANDSTINDEX_V(
142 rdev
->lldi
.ciq_ids
[cq
->vector
]));
143 res
->u
.cq
.iqdroprss_to_iqesize
= cpu_to_be16(
144 FW_RI_RES_WR_IQDROPRSS_F
|
145 FW_RI_RES_WR_IQPCIECH_V(2) |
146 FW_RI_RES_WR_IQINTCNTTHRESH_V(0) |
148 ((user
&& ucontext
->is_32b_cqe
) ?
149 FW_RI_RES_WR_IQESIZE_V(1) :
150 FW_RI_RES_WR_IQESIZE_V(2)));
151 res
->u
.cq
.iqsize
= cpu_to_be16(cq
->size
);
152 res
->u
.cq
.iqaddr
= cpu_to_be64(cq
->dma_addr
);
154 c4iw_init_wr_wait(wr_waitp
);
155 ret
= c4iw_ref_send_wait(rdev
, skb
, wr_waitp
, 0, 0, __func__
);
160 cq
->gts
= rdev
->lldi
.gts_reg
;
163 cq
->bar2_va
= c4iw_bar2_addrs(rdev
, cq
->cqid
, CXGB4_BAR2_QTYPE_INGRESS
,
165 user
? &cq
->bar2_pa
: NULL
);
166 if (user
&& !cq
->bar2_pa
) {
167 pr_warn("%s: cqid %u not in BAR2 range\n",
168 pci_name(rdev
->lldi
.pdev
), cq
->cqid
);
174 dma_free_coherent(&rdev
->lldi
.pdev
->dev
, cq
->memsize
, cq
->queue
,
175 dma_unmap_addr(cq
, mapping
));
179 c4iw_put_cqid(rdev
, cq
->cqid
, uctx
);
184 static void insert_recv_cqe(struct t4_wq
*wq
, struct t4_cq
*cq
, u32 srqidx
)
188 pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n",
189 wq
, cq
, cq
->sw_cidx
, cq
->sw_pidx
);
190 memset(&cqe
, 0, sizeof(cqe
));
191 cqe
.header
= cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH
) |
192 CQE_OPCODE_V(FW_RI_SEND
) |
195 CQE_QPID_V(wq
->sq
.qid
));
196 cqe
.bits_type_ts
= cpu_to_be64(CQE_GENBIT_V((u64
)cq
->gen
));
198 cqe
.u
.srcqe
.abs_rqe_idx
= cpu_to_be32(srqidx
);
199 cq
->sw_queue
[cq
->sw_pidx
] = cqe
;
203 int c4iw_flush_rq(struct t4_wq
*wq
, struct t4_cq
*cq
, int count
)
206 int in_use
= wq
->rq
.in_use
- count
;
208 pr_debug("wq %p cq %p rq.in_use %u skip count %u\n",
209 wq
, cq
, wq
->rq
.in_use
, count
);
211 insert_recv_cqe(wq
, cq
, 0);
217 static void insert_sq_cqe(struct t4_wq
*wq
, struct t4_cq
*cq
,
218 struct t4_swsqe
*swcqe
)
222 pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n",
223 wq
, cq
, cq
->sw_cidx
, cq
->sw_pidx
);
224 memset(&cqe
, 0, sizeof(cqe
));
225 cqe
.header
= cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH
) |
226 CQE_OPCODE_V(swcqe
->opcode
) |
229 CQE_QPID_V(wq
->sq
.qid
));
230 CQE_WRID_SQ_IDX(&cqe
) = swcqe
->idx
;
231 cqe
.bits_type_ts
= cpu_to_be64(CQE_GENBIT_V((u64
)cq
->gen
));
232 cq
->sw_queue
[cq
->sw_pidx
] = cqe
;
236 static void advance_oldest_read(struct t4_wq
*wq
);
238 int c4iw_flush_sq(struct c4iw_qp
*qhp
)
241 struct t4_wq
*wq
= &qhp
->wq
;
242 struct c4iw_cq
*chp
= to_c4iw_cq(qhp
->ibqp
.send_cq
);
243 struct t4_cq
*cq
= &chp
->cq
;
245 struct t4_swsqe
*swsqe
;
247 if (wq
->sq
.flush_cidx
== -1)
248 wq
->sq
.flush_cidx
= wq
->sq
.cidx
;
249 idx
= wq
->sq
.flush_cidx
;
250 while (idx
!= wq
->sq
.pidx
) {
251 swsqe
= &wq
->sq
.sw_sq
[idx
];
253 insert_sq_cqe(wq
, cq
, swsqe
);
254 if (wq
->sq
.oldest_read
== swsqe
) {
255 advance_oldest_read(wq
);
258 if (++idx
== wq
->sq
.size
)
261 wq
->sq
.flush_cidx
+= flushed
;
262 if (wq
->sq
.flush_cidx
>= wq
->sq
.size
)
263 wq
->sq
.flush_cidx
-= wq
->sq
.size
;
267 static void flush_completed_wrs(struct t4_wq
*wq
, struct t4_cq
*cq
)
269 struct t4_swsqe
*swsqe
;
272 if (wq
->sq
.flush_cidx
== -1)
273 wq
->sq
.flush_cidx
= wq
->sq
.cidx
;
274 cidx
= wq
->sq
.flush_cidx
;
276 while (cidx
!= wq
->sq
.pidx
) {
277 swsqe
= &wq
->sq
.sw_sq
[cidx
];
278 if (!swsqe
->signaled
) {
279 if (++cidx
== wq
->sq
.size
)
281 } else if (swsqe
->complete
) {
284 * Insert this completed cqe into the swcq.
286 pr_debug("moving cqe into swcq sq idx %u cq idx %u\n",
288 swsqe
->cqe
.header
|= htonl(CQE_SWCQE_V(1));
289 cq
->sw_queue
[cq
->sw_pidx
] = swsqe
->cqe
;
292 if (++cidx
== wq
->sq
.size
)
294 wq
->sq
.flush_cidx
= cidx
;
300 static void create_read_req_cqe(struct t4_wq
*wq
, struct t4_cqe
*hw_cqe
,
301 struct t4_cqe
*read_cqe
)
303 read_cqe
->u
.scqe
.cidx
= wq
->sq
.oldest_read
->idx
;
304 read_cqe
->len
= htonl(wq
->sq
.oldest_read
->read_len
);
305 read_cqe
->header
= htonl(CQE_QPID_V(CQE_QPID(hw_cqe
)) |
306 CQE_SWCQE_V(SW_CQE(hw_cqe
)) |
307 CQE_OPCODE_V(FW_RI_READ_REQ
) |
309 read_cqe
->bits_type_ts
= hw_cqe
->bits_type_ts
;
312 static void advance_oldest_read(struct t4_wq
*wq
)
315 u32 rptr
= wq
->sq
.oldest_read
- wq
->sq
.sw_sq
+ 1;
317 if (rptr
== wq
->sq
.size
)
319 while (rptr
!= wq
->sq
.pidx
) {
320 wq
->sq
.oldest_read
= &wq
->sq
.sw_sq
[rptr
];
322 if (wq
->sq
.oldest_read
->opcode
== FW_RI_READ_REQ
)
324 if (++rptr
== wq
->sq
.size
)
327 wq
->sq
.oldest_read
= NULL
;
331 * Move all CQEs from the HWCQ into the SWCQ.
332 * Deal with out-of-order and/or completions that complete
333 * prior unsignalled WRs.
335 void c4iw_flush_hw_cq(struct c4iw_cq
*chp
, struct c4iw_qp
*flush_qhp
)
337 struct t4_cqe
*hw_cqe
, *swcqe
, read_cqe
;
339 struct t4_swsqe
*swsqe
;
342 pr_debug("cqid 0x%x\n", chp
->cq
.cqid
);
343 ret
= t4_next_hw_cqe(&chp
->cq
, &hw_cqe
);
346 * This logic is similar to poll_cq(), but not quite the same
347 * unfortunately. Need to move pertinent HW CQEs to the SW CQ but
348 * also do any translation magic that poll_cq() normally does.
351 qhp
= get_qhp(chp
->rhp
, CQE_QPID(hw_cqe
));
354 * drop CQEs with no associated QP
359 if (flush_qhp
!= qhp
) {
360 spin_lock(&qhp
->lock
);
362 if (qhp
->wq
.flushed
== 1)
366 if (CQE_OPCODE(hw_cqe
) == FW_RI_TERMINATE
)
369 if (CQE_OPCODE(hw_cqe
) == FW_RI_READ_RESP
) {
371 /* If we have reached here because of async
372 * event or other error, and have egress error
375 if (CQE_TYPE(hw_cqe
) == 1)
378 /* drop peer2peer RTR reads.
380 if (CQE_WRID_STAG(hw_cqe
) == 1)
384 * Eat completions for unsignaled read WRs.
386 if (!qhp
->wq
.sq
.oldest_read
->signaled
) {
387 advance_oldest_read(&qhp
->wq
);
392 * Don't write to the HWCQ, create a new read req CQE
393 * in local memory and move it into the swcq.
395 create_read_req_cqe(&qhp
->wq
, hw_cqe
, &read_cqe
);
397 advance_oldest_read(&qhp
->wq
);
400 /* if its a SQ completion, then do the magic to move all the
401 * unsignaled and now in-order completions into the swcq.
403 if (SQ_TYPE(hw_cqe
)) {
404 swsqe
= &qhp
->wq
.sq
.sw_sq
[CQE_WRID_SQ_IDX(hw_cqe
)];
405 swsqe
->cqe
= *hw_cqe
;
407 flush_completed_wrs(&qhp
->wq
, &chp
->cq
);
409 swcqe
= &chp
->cq
.sw_queue
[chp
->cq
.sw_pidx
];
411 swcqe
->header
|= cpu_to_be32(CQE_SWCQE_V(1));
412 t4_swcq_produce(&chp
->cq
);
415 t4_hwcq_consume(&chp
->cq
);
416 ret
= t4_next_hw_cqe(&chp
->cq
, &hw_cqe
);
417 if (qhp
&& flush_qhp
!= qhp
)
418 spin_unlock(&qhp
->lock
);
422 static int cqe_completes_wr(struct t4_cqe
*cqe
, struct t4_wq
*wq
)
424 if (DRAIN_CQE(cqe
)) {
425 WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq
->sq
.qid
);
429 if (CQE_OPCODE(cqe
) == FW_RI_TERMINATE
)
432 if ((CQE_OPCODE(cqe
) == FW_RI_RDMA_WRITE
) && RQ_TYPE(cqe
))
435 if ((CQE_OPCODE(cqe
) == FW_RI_READ_RESP
) && SQ_TYPE(cqe
))
438 if (CQE_SEND_OPCODE(cqe
) && RQ_TYPE(cqe
) && t4_rq_empty(wq
))
443 void c4iw_count_rcqes(struct t4_cq
*cq
, struct t4_wq
*wq
, int *count
)
449 pr_debug("count zero %d\n", *count
);
451 while (ptr
!= cq
->sw_pidx
) {
452 cqe
= &cq
->sw_queue
[ptr
];
453 if (RQ_TYPE(cqe
) && (CQE_OPCODE(cqe
) != FW_RI_READ_RESP
) &&
454 (CQE_QPID(cqe
) == wq
->sq
.qid
) && cqe_completes_wr(cqe
, wq
))
456 if (++ptr
== cq
->size
)
459 pr_debug("cq %p count %d\n", cq
, *count
);
462 static void post_pending_srq_wrs(struct t4_srq
*srq
)
464 struct t4_srq_pending_wr
*pwr
;
467 while (srq
->pending_in_use
) {
468 pwr
= &srq
->pending_wrs
[srq
->pending_cidx
];
469 srq
->sw_rq
[srq
->pidx
].wr_id
= pwr
->wr_id
;
470 srq
->sw_rq
[srq
->pidx
].valid
= 1;
472 pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
474 srq
->cidx
, srq
->pidx
, srq
->wq_pidx
,
475 srq
->in_use
, srq
->size
,
476 (unsigned long long)pwr
->wr_id
);
478 c4iw_copy_wr_to_srq(srq
, &pwr
->wqe
, pwr
->len16
);
479 t4_srq_consume_pending_wr(srq
);
480 t4_srq_produce(srq
, pwr
->len16
);
481 idx
+= DIV_ROUND_UP(pwr
->len16
* 16, T4_EQ_ENTRY_SIZE
);
485 t4_ring_srq_db(srq
, idx
, pwr
->len16
, &pwr
->wqe
);
486 srq
->queue
[srq
->size
].status
.host_wq_pidx
=
491 static u64
reap_srq_cqe(struct t4_cqe
*hw_cqe
, struct t4_srq
*srq
)
493 int rel_idx
= CQE_ABS_RQE_IDX(hw_cqe
) - srq
->rqt_abs_idx
;
496 srq
->sw_rq
[rel_idx
].valid
= 0;
497 wr_id
= srq
->sw_rq
[rel_idx
].wr_id
;
499 if (rel_idx
== srq
->cidx
) {
500 pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
501 __func__
, rel_idx
, srq
->cidx
, srq
->pidx
,
502 srq
->wq_pidx
, srq
->in_use
, srq
->size
,
503 (unsigned long long)srq
->sw_rq
[rel_idx
].wr_id
);
505 while (srq
->ooo_count
&& !srq
->sw_rq
[srq
->cidx
].valid
) {
506 pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
507 __func__
, srq
->cidx
, srq
->pidx
,
508 srq
->wq_pidx
, srq
->in_use
,
509 srq
->size
, srq
->ooo_count
,
511 srq
->sw_rq
[srq
->cidx
].wr_id
);
512 t4_srq_consume_ooo(srq
);
514 if (srq
->ooo_count
== 0 && srq
->pending_in_use
)
515 post_pending_srq_wrs(srq
);
517 pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
518 __func__
, rel_idx
, srq
->cidx
,
519 srq
->pidx
, srq
->wq_pidx
,
520 srq
->in_use
, srq
->size
,
522 (unsigned long long)srq
->sw_rq
[rel_idx
].wr_id
);
523 t4_srq_produce_ooo(srq
);
532 * check the validity of the first CQE,
533 * supply the wq assicated with the qpid.
535 * credit: cq credit to return to sge.
536 * cqe_flushed: 1 iff the CQE is flushed.
537 * cqe: copy of the polled CQE.
541 * -EAGAIN CQE skipped, try again.
542 * -EOVERFLOW CQ overflow detected.
544 static int poll_cq(struct t4_wq
*wq
, struct t4_cq
*cq
, struct t4_cqe
*cqe
,
545 u8
*cqe_flushed
, u64
*cookie
, u32
*credit
,
549 struct t4_cqe
*hw_cqe
, read_cqe
;
553 ret
= t4_next_cqe(cq
, &hw_cqe
);
557 pr_debug("CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
558 CQE_OVFBIT(hw_cqe
), CQE_QPID(hw_cqe
),
559 CQE_GENBIT(hw_cqe
), CQE_TYPE(hw_cqe
), CQE_STATUS(hw_cqe
),
560 CQE_OPCODE(hw_cqe
), CQE_LEN(hw_cqe
), CQE_WRID_HI(hw_cqe
),
561 CQE_WRID_LOW(hw_cqe
));
564 * skip cqe's not affiliated with a QP.
572 * skip hw cqe's if the wq is flushed.
574 if (wq
->flushed
&& !SW_CQE(hw_cqe
)) {
580 * skip TERMINATE cqes...
582 if (CQE_OPCODE(hw_cqe
) == FW_RI_TERMINATE
) {
588 * Special cqe for drain WR completions...
590 if (DRAIN_CQE(hw_cqe
)) {
591 *cookie
= CQE_DRAIN_COOKIE(hw_cqe
);
597 * Gotta tweak READ completions:
598 * 1) the cqe doesn't contain the sq_wptr from the wr.
599 * 2) opcode not reflected from the wr.
600 * 3) read_len not reflected from the wr.
601 * 4) cq_type is RQ_TYPE not SQ_TYPE.
603 if (RQ_TYPE(hw_cqe
) && (CQE_OPCODE(hw_cqe
) == FW_RI_READ_RESP
)) {
605 /* If we have reached here because of async
606 * event or other error, and have egress error
609 if (CQE_TYPE(hw_cqe
) == 1) {
610 if (CQE_STATUS(hw_cqe
))
611 t4_set_wq_in_error(wq
, 0);
616 /* If this is an unsolicited read response, then the read
617 * was generated by the kernel driver as part of peer-2-peer
618 * connection setup. So ignore the completion.
620 if (CQE_WRID_STAG(hw_cqe
) == 1) {
621 if (CQE_STATUS(hw_cqe
))
622 t4_set_wq_in_error(wq
, 0);
628 * Eat completions for unsignaled read WRs.
630 if (!wq
->sq
.oldest_read
->signaled
) {
631 advance_oldest_read(wq
);
637 * Don't write to the HWCQ, so create a new read req CQE
640 create_read_req_cqe(wq
, hw_cqe
, &read_cqe
);
642 advance_oldest_read(wq
);
645 if (CQE_STATUS(hw_cqe
) || t4_wq_in_error(wq
)) {
646 *cqe_flushed
= (CQE_STATUS(hw_cqe
) == T4_ERR_SWFLUSH
);
647 t4_set_wq_in_error(wq
, 0);
653 if (RQ_TYPE(hw_cqe
)) {
656 * HW only validates 4 bits of MSN. So we must validate that
657 * the MSN in the SEND is the next expected MSN. If its not,
658 * then we complete this with T4_ERR_MSN and mark the wq in
661 if (unlikely(!CQE_STATUS(hw_cqe
) &&
662 CQE_WRID_MSN(hw_cqe
) != wq
->rq
.msn
)) {
663 t4_set_wq_in_error(wq
, 0);
664 hw_cqe
->header
|= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN
));
670 * If we get here its a send completion.
672 * Handle out of order completion. These get stuffed
673 * in the SW SQ. Then the SW SQ is walked to move any
674 * now in-order completions into the SW CQ. This handles
676 * 1) reaping unsignaled WRs when the first subsequent
677 * signaled WR is completed.
678 * 2) out of order read completions.
680 if (!SW_CQE(hw_cqe
) && (CQE_WRID_SQ_IDX(hw_cqe
) != wq
->sq
.cidx
)) {
681 struct t4_swsqe
*swsqe
;
683 pr_debug("out of order completion going in sw_sq at idx %u\n",
684 CQE_WRID_SQ_IDX(hw_cqe
));
685 swsqe
= &wq
->sq
.sw_sq
[CQE_WRID_SQ_IDX(hw_cqe
)];
686 swsqe
->cqe
= *hw_cqe
;
696 * Reap the associated WR(s) that are freed up with this
699 if (SQ_TYPE(hw_cqe
)) {
700 int idx
= CQE_WRID_SQ_IDX(hw_cqe
);
703 * Account for any unsignaled completions completed by
704 * this signaled completion. In this case, cidx points
705 * to the first unsignaled one, and idx points to the
706 * signaled one. So adjust in_use based on this delta.
707 * if this is not completing any unsigned wrs, then the
708 * delta will be 0. Handle wrapping also!
710 if (idx
< wq
->sq
.cidx
)
711 wq
->sq
.in_use
-= wq
->sq
.size
+ idx
- wq
->sq
.cidx
;
713 wq
->sq
.in_use
-= idx
- wq
->sq
.cidx
;
715 wq
->sq
.cidx
= (uint16_t)idx
;
716 pr_debug("completing sq idx %u\n", wq
->sq
.cidx
);
717 *cookie
= wq
->sq
.sw_sq
[wq
->sq
.cidx
].wr_id
;
719 c4iw_log_wr_stats(wq
, hw_cqe
);
723 pr_debug("completing rq idx %u\n", wq
->rq
.cidx
);
724 *cookie
= wq
->rq
.sw_rq
[wq
->rq
.cidx
].wr_id
;
726 c4iw_log_wr_stats(wq
, hw_cqe
);
729 *cookie
= reap_srq_cqe(hw_cqe
, srq
);
737 * Flush any completed cqes that are now in-order.
739 flush_completed_wrs(wq
, cq
);
742 if (SW_CQE(hw_cqe
)) {
743 pr_debug("cq %p cqid 0x%x skip sw cqe cidx %u\n",
744 cq
, cq
->cqid
, cq
->sw_cidx
);
747 pr_debug("cq %p cqid 0x%x skip hw cqe cidx %u\n",
748 cq
, cq
->cqid
, cq
->cidx
);
754 static int __c4iw_poll_cq_one(struct c4iw_cq
*chp
, struct c4iw_qp
*qhp
,
755 struct ib_wc
*wc
, struct c4iw_srq
*srq
)
757 struct t4_cqe
uninitialized_var(cqe
);
758 struct t4_wq
*wq
= qhp
? &qhp
->wq
: NULL
;
764 ret
= poll_cq(wq
, &(chp
->cq
), &cqe
, &cqe_flushed
, &cookie
, &credit
,
765 srq
? &srq
->wq
: NULL
);
770 wc
->qp
= qhp
? &qhp
->ibqp
: NULL
;
771 wc
->vendor_err
= CQE_STATUS(&cqe
);
775 * Simulate a SRQ_LIMIT_REACHED HW notification if required.
777 if (srq
&& !(srq
->flags
& T4_SRQ_LIMIT_SUPPORT
) && srq
->armed
&&
778 srq
->wq
.in_use
< srq
->srq_limit
)
779 c4iw_dispatch_srq_limit_reached_event(srq
);
781 pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
783 CQE_TYPE(&cqe
), CQE_OPCODE(&cqe
),
784 CQE_STATUS(&cqe
), CQE_LEN(&cqe
),
785 CQE_WRID_HI(&cqe
), CQE_WRID_LOW(&cqe
),
786 (unsigned long long)cookie
);
788 if (CQE_TYPE(&cqe
) == 0) {
789 if (!CQE_STATUS(&cqe
))
790 wc
->byte_len
= CQE_LEN(&cqe
);
794 switch (CQE_OPCODE(&cqe
)) {
796 wc
->opcode
= IB_WC_RECV
;
798 case FW_RI_SEND_WITH_INV
:
799 case FW_RI_SEND_WITH_SE_INV
:
800 wc
->opcode
= IB_WC_RECV
;
801 wc
->ex
.invalidate_rkey
= CQE_WRID_STAG(&cqe
);
802 wc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
803 c4iw_invalidate_mr(qhp
->rhp
, wc
->ex
.invalidate_rkey
);
805 case FW_RI_WRITE_IMMEDIATE
:
806 wc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
807 wc
->ex
.imm_data
= CQE_IMM_DATA(&cqe
);
808 wc
->wc_flags
|= IB_WC_WITH_IMM
;
811 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
812 CQE_OPCODE(&cqe
), CQE_QPID(&cqe
));
817 switch (CQE_OPCODE(&cqe
)) {
818 case FW_RI_WRITE_IMMEDIATE
:
819 case FW_RI_RDMA_WRITE
:
820 wc
->opcode
= IB_WC_RDMA_WRITE
;
823 wc
->opcode
= IB_WC_RDMA_READ
;
824 wc
->byte_len
= CQE_LEN(&cqe
);
826 case FW_RI_SEND_WITH_INV
:
827 case FW_RI_SEND_WITH_SE_INV
:
828 wc
->opcode
= IB_WC_SEND
;
829 wc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
832 case FW_RI_SEND_WITH_SE
:
833 wc
->opcode
= IB_WC_SEND
;
836 case FW_RI_LOCAL_INV
:
837 wc
->opcode
= IB_WC_LOCAL_INV
;
839 case FW_RI_FAST_REGISTER
:
840 wc
->opcode
= IB_WC_REG_MR
;
842 /* Invalidate the MR if the fastreg failed */
843 if (CQE_STATUS(&cqe
) != T4_ERR_SUCCESS
)
844 c4iw_invalidate_mr(qhp
->rhp
,
845 CQE_WRID_FR_STAG(&cqe
));
848 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
849 CQE_OPCODE(&cqe
), CQE_QPID(&cqe
));
856 wc
->status
= IB_WC_WR_FLUSH_ERR
;
859 switch (CQE_STATUS(&cqe
)) {
861 wc
->status
= IB_WC_SUCCESS
;
864 wc
->status
= IB_WC_LOC_ACCESS_ERR
;
867 wc
->status
= IB_WC_LOC_PROT_ERR
;
871 wc
->status
= IB_WC_LOC_ACCESS_ERR
;
874 wc
->status
= IB_WC_GENERAL_ERR
;
877 wc
->status
= IB_WC_LOC_LEN_ERR
;
879 case T4_ERR_INVALIDATE_SHARED_MR
:
880 case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND
:
881 wc
->status
= IB_WC_MW_BIND_ERR
;
885 case T4_ERR_PDU_LEN_ERR
:
886 case T4_ERR_OUT_OF_RQE
:
887 case T4_ERR_DDP_VERSION
:
888 case T4_ERR_RDMA_VERSION
:
889 case T4_ERR_DDP_QUEUE_NUM
:
893 case T4_ERR_MSN_RANGE
:
894 case T4_ERR_IRD_OVERFLOW
:
896 case T4_ERR_INTERNAL_ERR
:
897 wc
->status
= IB_WC_FATAL_ERR
;
900 wc
->status
= IB_WC_WR_FLUSH_ERR
;
903 pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
904 CQE_STATUS(&cqe
), CQE_QPID(&cqe
));
905 wc
->status
= IB_WC_FATAL_ERR
;
913 * Get one cq entry from c4iw and map it to openib.
918 * -EAGAIN caller must try again
919 * any other -errno fatal error
921 static int c4iw_poll_cq_one(struct c4iw_cq
*chp
, struct ib_wc
*wc
)
923 struct c4iw_srq
*srq
= NULL
;
924 struct c4iw_qp
*qhp
= NULL
;
925 struct t4_cqe
*rd_cqe
;
928 ret
= t4_next_cqe(&chp
->cq
, &rd_cqe
);
933 qhp
= get_qhp(chp
->rhp
, CQE_QPID(rd_cqe
));
935 spin_lock(&qhp
->lock
);
938 spin_lock(&srq
->lock
);
939 ret
= __c4iw_poll_cq_one(chp
, qhp
, wc
, srq
);
940 spin_unlock(&qhp
->lock
);
942 spin_unlock(&srq
->lock
);
944 ret
= __c4iw_poll_cq_one(chp
, NULL
, wc
, NULL
);
949 int c4iw_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
956 chp
= to_c4iw_cq(ibcq
);
958 spin_lock_irqsave(&chp
->lock
, flags
);
959 for (npolled
= 0; npolled
< num_entries
; ++npolled
) {
961 err
= c4iw_poll_cq_one(chp
, wc
+ npolled
);
962 } while (err
== -EAGAIN
);
966 spin_unlock_irqrestore(&chp
->lock
, flags
);
967 return !err
|| err
== -ENODATA
? npolled
: err
;
970 void c4iw_destroy_cq(struct ib_cq
*ib_cq
, struct ib_udata
*udata
)
973 struct c4iw_ucontext
*ucontext
;
975 pr_debug("ib_cq %p\n", ib_cq
);
976 chp
= to_c4iw_cq(ib_cq
);
978 xa_erase_irq(&chp
->rhp
->cqs
, chp
->cq
.cqid
);
979 atomic_dec(&chp
->refcnt
);
980 wait_event(chp
->wait
, !atomic_read(&chp
->refcnt
));
982 ucontext
= rdma_udata_to_drv_context(udata
, struct c4iw_ucontext
,
984 destroy_cq(&chp
->rhp
->rdev
, &chp
->cq
,
985 ucontext
? &ucontext
->uctx
: &chp
->cq
.rdev
->uctx
,
986 chp
->destroy_skb
, chp
->wr_waitp
);
987 c4iw_put_wr_wait(chp
->wr_waitp
);
990 int c4iw_create_cq(struct ib_cq
*ibcq
, const struct ib_cq_init_attr
*attr
,
991 struct ib_udata
*udata
)
993 struct ib_device
*ibdev
= ibcq
->device
;
994 int entries
= attr
->cqe
;
995 int vector
= attr
->comp_vector
;
996 struct c4iw_dev
*rhp
= to_c4iw_dev(ibcq
->device
);
997 struct c4iw_cq
*chp
= to_c4iw_cq(ibcq
);
998 struct c4iw_create_cq ucmd
;
999 struct c4iw_create_cq_resp uresp
;
1001 size_t memsize
, hwentries
;
1002 struct c4iw_mm_entry
*mm
, *mm2
;
1003 struct c4iw_ucontext
*ucontext
= rdma_udata_to_drv_context(
1004 udata
, struct c4iw_ucontext
, ibucontext
);
1006 pr_debug("ib_dev %p entries %d\n", ibdev
, entries
);
1010 if (vector
>= rhp
->rdev
.lldi
.nciq
)
1014 if (udata
->inlen
< sizeof(ucmd
))
1015 ucontext
->is_32b_cqe
= 1;
1018 chp
->wr_waitp
= c4iw_alloc_wr_wait(GFP_KERNEL
);
1019 if (!chp
->wr_waitp
) {
1023 c4iw_init_wr_wait(chp
->wr_waitp
);
1025 wr_len
= sizeof(struct fw_ri_res_wr
) + sizeof(struct fw_ri_res
);
1026 chp
->destroy_skb
= alloc_skb(wr_len
, GFP_KERNEL
);
1027 if (!chp
->destroy_skb
) {
1029 goto err_free_wr_wait
;
1032 /* account for the status page. */
1035 /* IQ needs one extra entry to differentiate full vs empty. */
1039 * entries must be multiple of 16 for HW.
1041 entries
= roundup(entries
, 16);
1044 * Make actual HW queue 2x to avoid cdix_inc overflows.
1046 hwentries
= min(entries
* 2, rhp
->rdev
.hw_queue
.t4_max_iq_size
);
1049 * Make HW queue at least 64 entries so GTS updates aren't too
1055 memsize
= hwentries
* ((ucontext
&& ucontext
->is_32b_cqe
) ?
1056 (sizeof(*chp
->cq
.queue
) / 2) : sizeof(*chp
->cq
.queue
));
1059 * memsize must be a multiple of the page size if its a user cq.
1062 memsize
= roundup(memsize
, PAGE_SIZE
);
1064 chp
->cq
.size
= hwentries
;
1065 chp
->cq
.memsize
= memsize
;
1066 chp
->cq
.vector
= vector
;
1068 ret
= create_cq(&rhp
->rdev
, &chp
->cq
,
1069 ucontext
? &ucontext
->uctx
: &rhp
->rdev
.uctx
,
1075 chp
->cq
.size
--; /* status page */
1076 chp
->ibcq
.cqe
= entries
- 2;
1077 spin_lock_init(&chp
->lock
);
1078 spin_lock_init(&chp
->comp_handler_lock
);
1079 atomic_set(&chp
->refcnt
, 1);
1080 init_waitqueue_head(&chp
->wait
);
1081 ret
= xa_insert_irq(&rhp
->cqs
, chp
->cq
.cqid
, chp
, GFP_KERNEL
);
1083 goto err_destroy_cq
;
1087 mm
= kmalloc(sizeof(*mm
), GFP_KERNEL
);
1089 goto err_remove_handle
;
1090 mm2
= kmalloc(sizeof(*mm2
), GFP_KERNEL
);
1094 memset(&uresp
, 0, sizeof(uresp
));
1095 uresp
.qid_mask
= rhp
->rdev
.cqmask
;
1096 uresp
.cqid
= chp
->cq
.cqid
;
1097 uresp
.size
= chp
->cq
.size
;
1098 uresp
.memsize
= chp
->cq
.memsize
;
1099 spin_lock(&ucontext
->mmap_lock
);
1100 uresp
.key
= ucontext
->key
;
1101 ucontext
->key
+= PAGE_SIZE
;
1102 uresp
.gts_key
= ucontext
->key
;
1103 ucontext
->key
+= PAGE_SIZE
;
1104 /* communicate to the userspace that
1105 * kernel driver supports 64B CQE
1107 uresp
.flags
|= C4IW_64B_CQE
;
1109 spin_unlock(&ucontext
->mmap_lock
);
1110 ret
= ib_copy_to_udata(udata
, &uresp
,
1111 ucontext
->is_32b_cqe
?
1112 sizeof(uresp
) - sizeof(uresp
.flags
) :
1117 mm
->key
= uresp
.key
;
1118 mm
->addr
= virt_to_phys(chp
->cq
.queue
);
1119 mm
->len
= chp
->cq
.memsize
;
1120 insert_mmap(ucontext
, mm
);
1122 mm2
->key
= uresp
.gts_key
;
1123 mm2
->addr
= chp
->cq
.bar2_pa
;
1124 mm2
->len
= PAGE_SIZE
;
1125 insert_mmap(ucontext
, mm2
);
1128 pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr %pad\n",
1129 chp
->cq
.cqid
, chp
, chp
->cq
.size
, chp
->cq
.memsize
,
1137 xa_erase_irq(&rhp
->cqs
, chp
->cq
.cqid
);
1139 destroy_cq(&chp
->rhp
->rdev
, &chp
->cq
,
1140 ucontext
? &ucontext
->uctx
: &rhp
->rdev
.uctx
,
1141 chp
->destroy_skb
, chp
->wr_waitp
);
1143 kfree_skb(chp
->destroy_skb
);
1145 c4iw_put_wr_wait(chp
->wr_waitp
);
1150 int c4iw_arm_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
1152 struct c4iw_cq
*chp
;
1156 chp
= to_c4iw_cq(ibcq
);
1157 spin_lock_irqsave(&chp
->lock
, flag
);
1159 (flags
& IB_CQ_SOLICITED_MASK
) == IB_CQ_SOLICITED
);
1160 if (flags
& IB_CQ_REPORT_MISSED_EVENTS
)
1161 ret
= t4_cq_notempty(&chp
->cq
);
1162 spin_unlock_irqrestore(&chp
->lock
, flag
);
1166 void c4iw_flush_srqidx(struct c4iw_qp
*qhp
, u32 srqidx
)
1168 struct c4iw_cq
*rchp
= to_c4iw_cq(qhp
->ibqp
.recv_cq
);
1171 /* locking heirarchy: cq lock first, then qp lock. */
1172 spin_lock_irqsave(&rchp
->lock
, flag
);
1173 spin_lock(&qhp
->lock
);
1175 /* create a SRQ RECV CQE for srqidx */
1176 insert_recv_cqe(&qhp
->wq
, &rchp
->cq
, srqidx
);
1178 spin_unlock(&qhp
->lock
);
1179 spin_unlock_irqrestore(&rchp
->lock
, flag
);