2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include "ipath_verbs.h"
36 * Convert the AETH RNR timeout code into the number of milliseconds.
38 const u32 ib_ipath_rnr_table
[32] = {
74 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
77 * XXX Use a simple list for now. We might need a priority
78 * queue if we have lots of QPs waiting for RNR timeouts
79 * but that should be rare.
81 void ipath_insert_rnr_queue(struct ipath_qp
*qp
)
83 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
86 spin_lock_irqsave(&dev
->pending_lock
, flags
);
87 if (list_empty(&dev
->rnrwait
))
88 list_add(&qp
->timerwait
, &dev
->rnrwait
);
90 struct list_head
*l
= &dev
->rnrwait
;
91 struct ipath_qp
*nqp
= list_entry(l
->next
, struct ipath_qp
,
94 while (qp
->s_rnr_timeout
>= nqp
->s_rnr_timeout
) {
95 qp
->s_rnr_timeout
-= nqp
->s_rnr_timeout
;
97 if (l
->next
== &dev
->rnrwait
)
99 nqp
= list_entry(l
->next
, struct ipath_qp
,
102 list_add(&qp
->timerwait
, l
);
104 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
108 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
110 * @wr_id_only: update wr_id only, not SGEs
112 * Return 0 if no RWQE is available, otherwise return 1.
114 * Called at interrupt level with the QP r_rq.lock held.
116 int ipath_get_rwqe(struct ipath_qp
*qp
, int wr_id_only
)
119 struct ipath_srq
*srq
;
120 struct ipath_rwqe
*wqe
;
125 if (unlikely(rq
->tail
== rq
->head
)) {
129 wqe
= get_rwqe_ptr(rq
, rq
->tail
);
130 qp
->r_wr_id
= wqe
->wr_id
;
132 qp
->r_sge
.sge
= wqe
->sg_list
[0];
133 qp
->r_sge
.sg_list
= wqe
->sg_list
+ 1;
134 qp
->r_sge
.num_sge
= wqe
->num_sge
;
135 qp
->r_len
= wqe
->length
;
137 if (++rq
->tail
>= rq
->size
)
143 srq
= to_isrq(qp
->ibqp
.srq
);
145 spin_lock(&rq
->lock
);
146 if (unlikely(rq
->tail
== rq
->head
)) {
147 spin_unlock(&rq
->lock
);
151 wqe
= get_rwqe_ptr(rq
, rq
->tail
);
152 qp
->r_wr_id
= wqe
->wr_id
;
154 qp
->r_sge
.sge
= wqe
->sg_list
[0];
155 qp
->r_sge
.sg_list
= wqe
->sg_list
+ 1;
156 qp
->r_sge
.num_sge
= wqe
->num_sge
;
157 qp
->r_len
= wqe
->length
;
159 if (++rq
->tail
>= rq
->size
)
161 if (srq
->ibsrq
.event_handler
) {
165 if (rq
->head
< rq
->tail
)
166 n
= rq
->size
+ rq
->head
- rq
->tail
;
168 n
= rq
->head
- rq
->tail
;
169 if (n
< srq
->limit
) {
171 spin_unlock(&rq
->lock
);
172 ev
.device
= qp
->ibqp
.device
;
173 ev
.element
.srq
= qp
->ibqp
.srq
;
174 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
175 srq
->ibsrq
.event_handler(&ev
,
176 srq
->ibsrq
.srq_context
);
178 spin_unlock(&rq
->lock
);
180 spin_unlock(&rq
->lock
);
188 * ipath_ruc_loopback - handle UC and RC lookback requests
189 * @sqp: the loopback QP
190 * @wc: the work completion entry
192 * This is called from ipath_do_uc_send() or ipath_do_rc_send() to
193 * forward a WQE addressed to the same HCA.
194 * Note that although we are single threaded due to the tasklet, we still
195 * have to protect against post_send(). We don't have to worry about
196 * receive interrupts since this is a connected protocol and all packets
197 * will pass through here.
199 void ipath_ruc_loopback(struct ipath_qp
*sqp
, struct ib_wc
*wc
)
201 struct ipath_ibdev
*dev
= to_idev(sqp
->ibqp
.device
);
203 struct ipath_swqe
*wqe
;
204 struct ipath_sge
*sge
;
208 qp
= ipath_lookup_qpn(&dev
->qp_table
, sqp
->remote_qpn
);
215 spin_lock_irqsave(&sqp
->s_lock
, flags
);
217 if (!(ib_ipath_state_ops
[sqp
->state
] & IPATH_PROCESS_SEND_OK
)) {
218 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
222 /* Get the next send request. */
223 if (sqp
->s_last
== sqp
->s_head
) {
224 /* Send work queue is empty. */
225 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
230 * We can rely on the entry not changing without the s_lock
231 * being held until we update s_last.
233 wqe
= get_swqe_ptr(sqp
, sqp
->s_last
);
234 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
239 sqp
->s_sge
.sge
= wqe
->sg_list
[0];
240 sqp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
241 sqp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
242 sqp
->s_len
= wqe
->length
;
243 switch (wqe
->wr
.opcode
) {
244 case IB_WR_SEND_WITH_IMM
:
245 wc
->wc_flags
= IB_WC_WITH_IMM
;
246 wc
->imm_data
= wqe
->wr
.imm_data
;
249 spin_lock_irqsave(&qp
->r_rq
.lock
, flags
);
250 if (!ipath_get_rwqe(qp
, 0)) {
252 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
254 if (qp
->ibqp
.qp_type
== IB_QPT_UC
)
256 if (sqp
->s_rnr_retry
== 0) {
257 wc
->status
= IB_WC_RNR_RETRY_EXC_ERR
;
260 if (sqp
->s_rnr_retry_cnt
< 7)
264 ib_ipath_rnr_table
[sqp
->s_min_rnr_timer
];
265 ipath_insert_rnr_queue(sqp
);
268 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
271 case IB_WR_RDMA_WRITE_WITH_IMM
:
272 wc
->wc_flags
= IB_WC_WITH_IMM
;
273 wc
->imm_data
= wqe
->wr
.imm_data
;
274 spin_lock_irqsave(&qp
->r_rq
.lock
, flags
);
275 if (!ipath_get_rwqe(qp
, 1))
277 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
279 case IB_WR_RDMA_WRITE
:
280 if (wqe
->length
== 0)
282 if (unlikely(!ipath_rkey_ok(dev
, &qp
->r_sge
, wqe
->length
,
283 wqe
->wr
.wr
.rdma
.remote_addr
,
284 wqe
->wr
.wr
.rdma
.rkey
,
285 IB_ACCESS_REMOTE_WRITE
))) {
287 wc
->status
= IB_WC_REM_ACCESS_ERR
;
289 wc
->wr_id
= wqe
->wr
.wr_id
;
290 wc
->opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
293 wc
->qp_num
= sqp
->ibqp
.qp_num
;
294 wc
->src_qp
= sqp
->remote_qpn
;
296 wc
->slid
= sqp
->remote_ah_attr
.dlid
;
297 wc
->sl
= sqp
->remote_ah_attr
.sl
;
298 wc
->dlid_path_bits
= 0;
300 ipath_sqerror_qp(sqp
, wc
);
305 case IB_WR_RDMA_READ
:
306 if (unlikely(!ipath_rkey_ok(dev
, &sqp
->s_sge
, wqe
->length
,
307 wqe
->wr
.wr
.rdma
.remote_addr
,
308 wqe
->wr
.wr
.rdma
.rkey
,
309 IB_ACCESS_REMOTE_READ
)))
311 if (unlikely(!(qp
->qp_access_flags
&
312 IB_ACCESS_REMOTE_READ
)))
314 qp
->r_sge
.sge
= wqe
->sg_list
[0];
315 qp
->r_sge
.sg_list
= wqe
->sg_list
+ 1;
316 qp
->r_sge
.num_sge
= wqe
->wr
.num_sge
;
319 case IB_WR_ATOMIC_CMP_AND_SWP
:
320 case IB_WR_ATOMIC_FETCH_AND_ADD
:
321 if (unlikely(!ipath_rkey_ok(dev
, &qp
->r_sge
, sizeof(u64
),
322 wqe
->wr
.wr
.rdma
.remote_addr
,
323 wqe
->wr
.wr
.rdma
.rkey
,
324 IB_ACCESS_REMOTE_ATOMIC
)))
326 /* Perform atomic OP and save result. */
327 sdata
= wqe
->wr
.wr
.atomic
.swap
;
328 spin_lock_irqsave(&dev
->pending_lock
, flags
);
329 qp
->r_atomic_data
= *(u64
*) qp
->r_sge
.sge
.vaddr
;
330 if (wqe
->wr
.opcode
== IB_WR_ATOMIC_FETCH_AND_ADD
)
331 *(u64
*) qp
->r_sge
.sge
.vaddr
=
332 qp
->r_atomic_data
+ sdata
;
333 else if (qp
->r_atomic_data
== wqe
->wr
.wr
.atomic
.compare_add
)
334 *(u64
*) qp
->r_sge
.sge
.vaddr
= sdata
;
335 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
336 *(u64
*) sqp
->s_sge
.sge
.vaddr
= qp
->r_atomic_data
;
343 sge
= &sqp
->s_sge
.sge
;
345 u32 len
= sqp
->s_len
;
347 if (len
> sge
->length
)
350 ipath_copy_sge(&qp
->r_sge
, sge
->vaddr
, len
);
353 sge
->sge_length
-= len
;
354 if (sge
->sge_length
== 0) {
355 if (--sqp
->s_sge
.num_sge
)
356 *sge
= *sqp
->s_sge
.sg_list
++;
357 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
358 if (++sge
->n
>= IPATH_SEGSZ
) {
359 if (++sge
->m
>= sge
->mr
->mapsz
)
364 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
366 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
371 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
||
372 wqe
->wr
.opcode
== IB_WR_RDMA_READ
)
375 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE_WITH_IMM
)
376 wc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
378 wc
->opcode
= IB_WC_RECV
;
379 wc
->wr_id
= qp
->r_wr_id
;
380 wc
->status
= IB_WC_SUCCESS
;
382 wc
->byte_len
= wqe
->length
;
383 wc
->qp_num
= qp
->ibqp
.qp_num
;
384 wc
->src_qp
= qp
->remote_qpn
;
385 /* XXX do we know which pkey matched? Only needed for GSI. */
387 wc
->slid
= qp
->remote_ah_attr
.dlid
;
388 wc
->sl
= qp
->remote_ah_attr
.sl
;
389 wc
->dlid_path_bits
= 0;
390 /* Signal completion event if the solicited bit is set. */
391 ipath_cq_enter(to_icq(qp
->ibqp
.recv_cq
), wc
,
392 wqe
->wr
.send_flags
& IB_SEND_SOLICITED
);
395 sqp
->s_rnr_retry
= sqp
->s_rnr_retry_cnt
;
397 if (!test_bit(IPATH_S_SIGNAL_REQ_WR
, &sqp
->s_flags
) ||
398 (wqe
->wr
.send_flags
& IB_SEND_SIGNALED
)) {
399 wc
->wr_id
= wqe
->wr
.wr_id
;
400 wc
->status
= IB_WC_SUCCESS
;
401 wc
->opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
403 wc
->byte_len
= wqe
->length
;
404 wc
->qp_num
= sqp
->ibqp
.qp_num
;
409 wc
->dlid_path_bits
= 0;
411 ipath_cq_enter(to_icq(sqp
->ibqp
.send_cq
), wc
, 0);
414 /* Update s_last now that we are finished with the SWQE */
415 spin_lock_irqsave(&sqp
->s_lock
, flags
);
416 if (++sqp
->s_last
>= sqp
->s_size
)
418 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
422 if (atomic_dec_and_test(&qp
->refcount
))
427 * ipath_no_bufs_available - tell the layer driver we need buffers
428 * @qp: the QP that caused the problem
429 * @dev: the device we ran out of buffers on
431 * Called when we run out of PIO buffers.
433 void ipath_no_bufs_available(struct ipath_qp
*qp
, struct ipath_ibdev
*dev
)
437 spin_lock_irqsave(&dev
->pending_lock
, flags
);
438 if (list_empty(&qp
->piowait
))
439 list_add_tail(&qp
->piowait
, &dev
->piowait
);
440 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
442 * Note that as soon as ipath_layer_want_buffer() is called and
443 * possibly before it returns, ipath_ib_piobufavail()
444 * could be called. If we are still in the tasklet function,
445 * tasklet_hi_schedule() will not call us until the next time
446 * tasklet_hi_schedule() is called.
447 * We clear the tasklet flag now since we are committing to return
448 * from the tasklet function.
450 clear_bit(IPATH_S_BUSY
, &qp
->s_flags
);
451 tasklet_unlock(&qp
->s_task
);
452 ipath_layer_want_buffer(dev
->dd
);
457 * ipath_post_rc_send - post RC and UC sends
458 * @qp: the QP to post on
459 * @wr: the work request to send
461 int ipath_post_rc_send(struct ipath_qp
*qp
, struct ib_send_wr
*wr
)
463 struct ipath_swqe
*wqe
;
471 * Don't allow RDMA reads or atomic operations on UC or
472 * undefined operations.
473 * Make sure buffer is large enough to hold the result for atomics.
475 if (qp
->ibqp
.qp_type
== IB_QPT_UC
) {
476 if ((unsigned) wr
->opcode
>= IB_WR_RDMA_READ
) {
480 } else if ((unsigned) wr
->opcode
> IB_WR_ATOMIC_FETCH_AND_ADD
) {
483 } else if (wr
->opcode
>= IB_WR_ATOMIC_CMP_AND_SWP
&&
485 wr
->sg_list
[0].length
< sizeof(u64
) ||
486 wr
->sg_list
[0].addr
& (sizeof(u64
) - 1))) {
490 /* IB spec says that num_sge == 0 is OK. */
491 if (wr
->num_sge
> qp
->s_max_sge
) {
495 spin_lock_irqsave(&qp
->s_lock
, flags
);
496 next
= qp
->s_head
+ 1;
497 if (next
>= qp
->s_size
)
499 if (next
== qp
->s_last
) {
500 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
505 wqe
= get_swqe_ptr(qp
, qp
->s_head
);
507 wqe
->ssn
= qp
->s_ssn
++;
508 wqe
->sg_list
[0].mr
= NULL
;
509 wqe
->sg_list
[0].vaddr
= NULL
;
510 wqe
->sg_list
[0].length
= 0;
511 wqe
->sg_list
[0].sge_length
= 0;
513 acc
= wr
->opcode
>= IB_WR_RDMA_READ
? IB_ACCESS_LOCAL_WRITE
: 0;
514 for (i
= 0, j
= 0; i
< wr
->num_sge
; i
++) {
515 if (to_ipd(qp
->ibqp
.pd
)->user
&& wr
->sg_list
[i
].lkey
== 0) {
516 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
520 if (wr
->sg_list
[i
].length
== 0)
522 if (!ipath_lkey_ok(&to_idev(qp
->ibqp
.device
)->lk_table
,
523 &wqe
->sg_list
[j
], &wr
->sg_list
[i
],
525 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
529 wqe
->length
+= wr
->sg_list
[i
].length
;
534 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
536 if (qp
->ibqp
.qp_type
== IB_QPT_UC
)
537 ipath_do_uc_send((unsigned long) qp
);
539 ipath_do_rc_send((unsigned long) qp
);