2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/spinlock.h>
36 #include "ipath_verbs.h"
37 #include "ipath_kernel.h"
40 * Convert the AETH RNR timeout code into the number of milliseconds.
42 const u32 ib_ipath_rnr_table
[32] = {
78 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
81 * XXX Use a simple list for now. We might need a priority
82 * queue if we have lots of QPs waiting for RNR timeouts
83 * but that should be rare.
85 void ipath_insert_rnr_queue(struct ipath_qp
*qp
)
87 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
90 spin_lock_irqsave(&dev
->pending_lock
, flags
);
91 if (list_empty(&dev
->rnrwait
))
92 list_add(&qp
->timerwait
, &dev
->rnrwait
);
94 struct list_head
*l
= &dev
->rnrwait
;
95 struct ipath_qp
*nqp
= list_entry(l
->next
, struct ipath_qp
,
98 while (qp
->s_rnr_timeout
>= nqp
->s_rnr_timeout
) {
99 qp
->s_rnr_timeout
-= nqp
->s_rnr_timeout
;
101 if (l
->next
== &dev
->rnrwait
)
103 nqp
= list_entry(l
->next
, struct ipath_qp
,
106 list_add(&qp
->timerwait
, l
);
108 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
112 * ipath_init_sge - Validate a RWQE and fill in the SGE state
117 int ipath_init_sge(struct ipath_qp
*qp
, struct ipath_rwqe
*wqe
,
118 u32
*lengthp
, struct ipath_sge_state
*ss
)
124 for (i
= j
= 0; i
< wqe
->num_sge
; i
++) {
125 if (wqe
->sg_list
[i
].length
== 0)
128 if (!ipath_lkey_ok(qp
, j
? &ss
->sg_list
[j
- 1] : &ss
->sge
,
129 &wqe
->sg_list
[i
], IB_ACCESS_LOCAL_WRITE
))
131 *lengthp
+= wqe
->sg_list
[i
].length
;
139 wc
.wr_id
= wqe
->wr_id
;
140 wc
.status
= IB_WC_LOC_PROT_ERR
;
141 wc
.opcode
= IB_WC_RECV
;
151 wc
.dlid_path_bits
= 0;
153 /* Signal solicited completion event. */
154 ipath_cq_enter(to_icq(qp
->ibqp
.recv_cq
), &wc
, 1);
161 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
163 * @wr_id_only: update wr_id only, not SGEs
165 * Return 0 if no RWQE is available, otherwise return 1.
167 * Can be called from interrupt level.
169 int ipath_get_rwqe(struct ipath_qp
*qp
, int wr_id_only
)
173 struct ipath_rwq
*wq
;
174 struct ipath_srq
*srq
;
175 struct ipath_rwqe
*wqe
;
176 void (*handler
)(struct ib_event
*, void *);
180 qp
->r_sge
.sg_list
= qp
->r_sg_list
;
183 srq
= to_isrq(qp
->ibqp
.srq
);
184 handler
= srq
->ibsrq
.event_handler
;
192 spin_lock_irqsave(&rq
->lock
, flags
);
195 /* Validate tail before using it since it is user writable. */
196 if (tail
>= rq
->size
)
199 if (unlikely(tail
== wq
->head
)) {
200 spin_unlock_irqrestore(&rq
->lock
, flags
);
204 /* Make sure entry is read after head index is read. */
206 wqe
= get_rwqe_ptr(rq
, tail
);
207 if (++tail
>= rq
->size
)
209 } while (!wr_id_only
&& !ipath_init_sge(qp
, wqe
, &qp
->r_len
,
211 qp
->r_wr_id
= wqe
->wr_id
;
215 qp
->r_wrid_valid
= 1;
220 * validate head pointer value and compute
221 * the number of remaining WQEs.
227 n
+= rq
->size
- tail
;
230 if (n
< srq
->limit
) {
234 spin_unlock_irqrestore(&rq
->lock
, flags
);
235 ev
.device
= qp
->ibqp
.device
;
236 ev
.element
.srq
= qp
->ibqp
.srq
;
237 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
238 handler(&ev
, srq
->ibsrq
.srq_context
);
242 spin_unlock_irqrestore(&rq
->lock
, flags
);
249 * ipath_ruc_loopback - handle UC and RC lookback requests
250 * @sqp: the sending QP
252 * This is called from ipath_do_send() to
253 * forward a WQE addressed to the same HCA.
254 * Note that although we are single threaded due to the tasklet, we still
255 * have to protect against post_send(). We don't have to worry about
256 * receive interrupts since this is a connected protocol and all packets
257 * will pass through here.
259 static void ipath_ruc_loopback(struct ipath_qp
*sqp
)
261 struct ipath_ibdev
*dev
= to_idev(sqp
->ibqp
.device
);
263 struct ipath_swqe
*wqe
;
264 struct ipath_sge
*sge
;
270 qp
= ipath_lookup_qpn(&dev
->qp_table
, sqp
->remote_qpn
);
277 spin_lock_irqsave(&sqp
->s_lock
, flags
);
279 if (!(ib_ipath_state_ops
[sqp
->state
] & IPATH_PROCESS_SEND_OK
) ||
280 sqp
->s_rnr_timeout
) {
281 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
285 /* Get the next send request. */
286 if (sqp
->s_last
== sqp
->s_head
) {
287 /* Send work queue is empty. */
288 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
293 * We can rely on the entry not changing without the s_lock
294 * being held until we update s_last.
296 wqe
= get_swqe_ptr(sqp
, sqp
->s_last
);
297 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
302 sqp
->s_sge
.sge
= wqe
->sg_list
[0];
303 sqp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
304 sqp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
305 sqp
->s_len
= wqe
->length
;
306 switch (wqe
->wr
.opcode
) {
307 case IB_WR_SEND_WITH_IMM
:
308 wc
.wc_flags
= IB_WC_WITH_IMM
;
309 wc
.imm_data
= wqe
->wr
.imm_data
;
312 if (!ipath_get_rwqe(qp
, 0)) {
315 if (qp
->ibqp
.qp_type
== IB_QPT_UC
)
317 if (sqp
->s_rnr_retry
== 0) {
318 wc
.status
= IB_WC_RNR_RETRY_EXC_ERR
;
321 if (sqp
->s_rnr_retry_cnt
< 7)
325 ib_ipath_rnr_table
[qp
->r_min_rnr_timer
];
326 ipath_insert_rnr_queue(sqp
);
331 case IB_WR_RDMA_WRITE_WITH_IMM
:
332 if (unlikely(!(qp
->qp_access_flags
&
333 IB_ACCESS_REMOTE_WRITE
))) {
334 wc
.status
= IB_WC_REM_INV_REQ_ERR
;
337 wc
.wc_flags
= IB_WC_WITH_IMM
;
338 wc
.imm_data
= wqe
->wr
.imm_data
;
339 if (!ipath_get_rwqe(qp
, 1))
342 case IB_WR_RDMA_WRITE
:
343 if (unlikely(!(qp
->qp_access_flags
&
344 IB_ACCESS_REMOTE_WRITE
))) {
345 wc
.status
= IB_WC_REM_INV_REQ_ERR
;
348 if (wqe
->length
== 0)
350 if (unlikely(!ipath_rkey_ok(qp
, &qp
->r_sge
, wqe
->length
,
351 wqe
->wr
.wr
.rdma
.remote_addr
,
352 wqe
->wr
.wr
.rdma
.rkey
,
353 IB_ACCESS_REMOTE_WRITE
))) {
355 wc
.status
= IB_WC_REM_ACCESS_ERR
;
357 wc
.wr_id
= wqe
->wr
.wr_id
;
358 wc
.opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
362 wc
.src_qp
= sqp
->remote_qpn
;
364 wc
.slid
= sqp
->remote_ah_attr
.dlid
;
365 wc
.sl
= sqp
->remote_ah_attr
.sl
;
366 wc
.dlid_path_bits
= 0;
368 spin_lock_irqsave(&sqp
->s_lock
, flags
);
369 ipath_sqerror_qp(sqp
, &wc
);
370 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
375 case IB_WR_RDMA_READ
:
376 if (unlikely(!(qp
->qp_access_flags
&
377 IB_ACCESS_REMOTE_READ
))) {
378 wc
.status
= IB_WC_REM_INV_REQ_ERR
;
381 if (unlikely(!ipath_rkey_ok(qp
, &sqp
->s_sge
, wqe
->length
,
382 wqe
->wr
.wr
.rdma
.remote_addr
,
383 wqe
->wr
.wr
.rdma
.rkey
,
384 IB_ACCESS_REMOTE_READ
)))
386 qp
->r_sge
.sge
= wqe
->sg_list
[0];
387 qp
->r_sge
.sg_list
= wqe
->sg_list
+ 1;
388 qp
->r_sge
.num_sge
= wqe
->wr
.num_sge
;
391 case IB_WR_ATOMIC_CMP_AND_SWP
:
392 case IB_WR_ATOMIC_FETCH_AND_ADD
:
393 if (unlikely(!(qp
->qp_access_flags
&
394 IB_ACCESS_REMOTE_ATOMIC
))) {
395 wc
.status
= IB_WC_REM_INV_REQ_ERR
;
398 if (unlikely(!ipath_rkey_ok(qp
, &qp
->r_sge
, sizeof(u64
),
399 wqe
->wr
.wr
.atomic
.remote_addr
,
400 wqe
->wr
.wr
.atomic
.rkey
,
401 IB_ACCESS_REMOTE_ATOMIC
)))
403 /* Perform atomic OP and save result. */
404 maddr
= (atomic64_t
*) qp
->r_sge
.sge
.vaddr
;
405 sdata
= wqe
->wr
.wr
.atomic
.compare_add
;
406 *(u64
*) sqp
->s_sge
.sge
.vaddr
=
407 (wqe
->wr
.opcode
== IB_WR_ATOMIC_FETCH_AND_ADD
) ?
408 (u64
) atomic64_add_return(sdata
, maddr
) - sdata
:
409 (u64
) cmpxchg((u64
*) qp
->r_sge
.sge
.vaddr
,
410 sdata
, wqe
->wr
.wr
.atomic
.swap
);
417 sge
= &sqp
->s_sge
.sge
;
419 u32 len
= sqp
->s_len
;
421 if (len
> sge
->length
)
423 if (len
> sge
->sge_length
)
424 len
= sge
->sge_length
;
426 ipath_copy_sge(&qp
->r_sge
, sge
->vaddr
, len
);
429 sge
->sge_length
-= len
;
430 if (sge
->sge_length
== 0) {
431 if (--sqp
->s_sge
.num_sge
)
432 *sge
= *sqp
->s_sge
.sg_list
++;
433 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
434 if (++sge
->n
>= IPATH_SEGSZ
) {
435 if (++sge
->m
>= sge
->mr
->mapsz
)
440 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
442 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
447 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
||
448 wqe
->wr
.opcode
== IB_WR_RDMA_READ
)
451 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE_WITH_IMM
)
452 wc
.opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
454 wc
.opcode
= IB_WC_RECV
;
455 wc
.wr_id
= qp
->r_wr_id
;
456 wc
.status
= IB_WC_SUCCESS
;
458 wc
.byte_len
= wqe
->length
;
460 wc
.src_qp
= qp
->remote_qpn
;
462 wc
.slid
= qp
->remote_ah_attr
.dlid
;
463 wc
.sl
= qp
->remote_ah_attr
.sl
;
464 wc
.dlid_path_bits
= 0;
466 /* Signal completion event if the solicited bit is set. */
467 ipath_cq_enter(to_icq(qp
->ibqp
.recv_cq
), &wc
,
468 wqe
->wr
.send_flags
& IB_SEND_SOLICITED
);
471 sqp
->s_rnr_retry
= sqp
->s_rnr_retry_cnt
;
472 ipath_send_complete(sqp
, wqe
, IB_WC_SUCCESS
);
476 if (atomic_dec_and_test(&qp
->refcount
))
480 static void want_buffer(struct ipath_devdata
*dd
)
482 set_bit(IPATH_S_PIOINTBUFAVAIL
, &dd
->ipath_sendctrl
);
483 ipath_write_kreg(dd
, dd
->ipath_kregs
->kr_sendctrl
,
488 * ipath_no_bufs_available - tell the layer driver we need buffers
489 * @qp: the QP that caused the problem
490 * @dev: the device we ran out of buffers on
492 * Called when we run out of PIO buffers.
494 static void ipath_no_bufs_available(struct ipath_qp
*qp
,
495 struct ipath_ibdev
*dev
)
500 * Note that as soon as want_buffer() is called and
501 * possibly before it returns, ipath_ib_piobufavail()
502 * could be called. If we are still in the tasklet function,
503 * tasklet_hi_schedule() will not call us until the next time
504 * tasklet_hi_schedule() is called.
505 * We leave the busy flag set so that another post send doesn't
506 * try to put the same QP on the piowait list again.
508 spin_lock_irqsave(&dev
->pending_lock
, flags
);
509 list_add_tail(&qp
->piowait
, &dev
->piowait
);
510 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
511 want_buffer(dev
->dd
);
516 * ipath_make_grh - construct a GRH header
517 * @dev: a pointer to the ipath device
518 * @hdr: a pointer to the GRH header being constructed
519 * @grh: the global route address to send to
520 * @hwords: the number of 32 bit words of header being sent
521 * @nwords: the number of 32 bit words of data being sent
523 * Return the size of the header in 32 bit words.
525 u32
ipath_make_grh(struct ipath_ibdev
*dev
, struct ib_grh
*hdr
,
526 struct ib_global_route
*grh
, u32 hwords
, u32 nwords
)
528 hdr
->version_tclass_flow
=
529 cpu_to_be32((6 << 28) |
530 (grh
->traffic_class
<< 20) |
532 hdr
->paylen
= cpu_to_be16((hwords
- 2 + nwords
+ SIZE_OF_CRC
) << 2);
533 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
534 hdr
->next_hdr
= 0x1B;
535 hdr
->hop_limit
= grh
->hop_limit
;
536 /* The SGID is 32-bit aligned. */
537 hdr
->sgid
.global
.subnet_prefix
= dev
->gid_prefix
;
538 hdr
->sgid
.global
.interface_id
= dev
->dd
->ipath_guid
;
539 hdr
->dgid
= grh
->dgid
;
541 /* GRH header size in 32-bit words. */
542 return sizeof(struct ib_grh
) / sizeof(u32
);
545 void ipath_make_ruc_header(struct ipath_ibdev
*dev
, struct ipath_qp
*qp
,
546 struct ipath_other_headers
*ohdr
,
553 /* Construct the header. */
554 extra_bytes
= -qp
->s_cur_size
& 3;
555 nwords
= (qp
->s_cur_size
+ extra_bytes
) >> 2;
556 lrh0
= IPATH_LRH_BTH
;
557 if (unlikely(qp
->remote_ah_attr
.ah_flags
& IB_AH_GRH
)) {
558 qp
->s_hdrwords
+= ipath_make_grh(dev
, &qp
->s_hdr
.u
.l
.grh
,
559 &qp
->remote_ah_attr
.grh
,
560 qp
->s_hdrwords
, nwords
);
561 lrh0
= IPATH_LRH_GRH
;
563 lrh0
|= qp
->remote_ah_attr
.sl
<< 4;
564 qp
->s_hdr
.lrh
[0] = cpu_to_be16(lrh0
);
565 qp
->s_hdr
.lrh
[1] = cpu_to_be16(qp
->remote_ah_attr
.dlid
);
566 qp
->s_hdr
.lrh
[2] = cpu_to_be16(qp
->s_hdrwords
+ nwords
+ SIZE_OF_CRC
);
567 qp
->s_hdr
.lrh
[3] = cpu_to_be16(dev
->dd
->ipath_lid
);
568 bth0
|= ipath_get_pkey(dev
->dd
, qp
->s_pkey_index
);
569 bth0
|= extra_bytes
<< 20;
570 ohdr
->bth
[0] = cpu_to_be32(bth0
| (1 << 22));
571 ohdr
->bth
[1] = cpu_to_be32(qp
->remote_qpn
);
572 ohdr
->bth
[2] = cpu_to_be32(bth2
);
576 * ipath_do_send - perform a send on a QP
577 * @data: contains a pointer to the QP
579 * Process entries in the send work queue until credit or queue is
580 * exhausted. Only allow one CPU to send a packet per QP (tasklet).
581 * Otherwise, two threads could send packets out of order.
583 void ipath_do_send(unsigned long data
)
585 struct ipath_qp
*qp
= (struct ipath_qp
*)data
;
586 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
587 int (*make_req
)(struct ipath_qp
*qp
);
589 if (test_and_set_bit(IPATH_S_BUSY
, &qp
->s_busy
))
592 if ((qp
->ibqp
.qp_type
== IB_QPT_RC
||
593 qp
->ibqp
.qp_type
== IB_QPT_UC
) &&
594 qp
->remote_ah_attr
.dlid
== dev
->dd
->ipath_lid
) {
595 ipath_ruc_loopback(qp
);
599 if (qp
->ibqp
.qp_type
== IB_QPT_RC
)
600 make_req
= ipath_make_rc_req
;
601 else if (qp
->ibqp
.qp_type
== IB_QPT_UC
)
602 make_req
= ipath_make_uc_req
;
604 make_req
= ipath_make_ud_req
;
607 /* Check for a constructed packet to be sent. */
608 if (qp
->s_hdrwords
!= 0) {
610 * If no PIO bufs are available, return. An interrupt will
611 * call ipath_ib_piobufavail() when one is available.
613 if (ipath_verbs_send(qp
, &qp
->s_hdr
, qp
->s_hdrwords
,
614 qp
->s_cur_sge
, qp
->s_cur_size
)) {
615 ipath_no_bufs_available(qp
, dev
);
618 dev
->n_unicast_xmit
++;
619 /* Record that we sent the packet and s_hdr is empty. */
626 clear_bit(IPATH_S_BUSY
, &qp
->s_busy
);
630 void ipath_send_complete(struct ipath_qp
*qp
, struct ipath_swqe
*wqe
,
631 enum ib_wc_status status
)
633 u32 last
= qp
->s_last
;
635 if (++last
== qp
->s_size
)
639 /* See ch. 11.2.4.1 and 10.7.3.1 */
640 if (!(qp
->s_flags
& IPATH_S_SIGNAL_REQ_WR
) ||
641 (wqe
->wr
.send_flags
& IB_SEND_SIGNALED
) ||
642 status
!= IB_WC_SUCCESS
) {
645 wc
.wr_id
= wqe
->wr
.wr_id
;
647 wc
.opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
649 wc
.byte_len
= wqe
->length
;
657 wc
.dlid_path_bits
= 0;
659 ipath_cq_enter(to_icq(qp
->ibqp
.send_cq
), &wc
, 0);