2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/spinlock.h>
36 #include "ipath_verbs.h"
37 #include "ipath_kernel.h"
40 * Convert the AETH RNR timeout code into the number of milliseconds.
42 const u32 ib_ipath_rnr_table
[32] = {
78 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
81 * XXX Use a simple list for now. We might need a priority
82 * queue if we have lots of QPs waiting for RNR timeouts
83 * but that should be rare.
85 void ipath_insert_rnr_queue(struct ipath_qp
*qp
)
87 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
90 spin_lock_irqsave(&dev
->pending_lock
, flags
);
91 if (list_empty(&dev
->rnrwait
))
92 list_add(&qp
->timerwait
, &dev
->rnrwait
);
94 struct list_head
*l
= &dev
->rnrwait
;
95 struct ipath_qp
*nqp
= list_entry(l
->next
, struct ipath_qp
,
98 while (qp
->s_rnr_timeout
>= nqp
->s_rnr_timeout
) {
99 qp
->s_rnr_timeout
-= nqp
->s_rnr_timeout
;
101 if (l
->next
== &dev
->rnrwait
) {
105 nqp
= list_entry(l
->next
, struct ipath_qp
,
109 nqp
->s_rnr_timeout
-= qp
->s_rnr_timeout
;
110 list_add(&qp
->timerwait
, l
);
112 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
116 * ipath_init_sge - Validate a RWQE and fill in the SGE state
121 int ipath_init_sge(struct ipath_qp
*qp
, struct ipath_rwqe
*wqe
,
122 u32
*lengthp
, struct ipath_sge_state
*ss
)
128 for (i
= j
= 0; i
< wqe
->num_sge
; i
++) {
129 if (wqe
->sg_list
[i
].length
== 0)
132 if (!ipath_lkey_ok(qp
, j
? &ss
->sg_list
[j
- 1] : &ss
->sge
,
133 &wqe
->sg_list
[i
], IB_ACCESS_LOCAL_WRITE
))
135 *lengthp
+= wqe
->sg_list
[i
].length
;
143 wc
.wr_id
= wqe
->wr_id
;
144 wc
.status
= IB_WC_LOC_PROT_ERR
;
145 wc
.opcode
= IB_WC_RECV
;
155 wc
.dlid_path_bits
= 0;
157 /* Signal solicited completion event. */
158 ipath_cq_enter(to_icq(qp
->ibqp
.recv_cq
), &wc
, 1);
165 * ipath_get_rwqe - copy the next RWQE into the QP's RWQE
167 * @wr_id_only: update wr_id only, not SGEs
169 * Return 0 if no RWQE is available, otherwise return 1.
171 * Can be called from interrupt level.
173 int ipath_get_rwqe(struct ipath_qp
*qp
, int wr_id_only
)
177 struct ipath_rwq
*wq
;
178 struct ipath_srq
*srq
;
179 struct ipath_rwqe
*wqe
;
180 void (*handler
)(struct ib_event
*, void *);
184 qp
->r_sge
.sg_list
= qp
->r_sg_list
;
187 srq
= to_isrq(qp
->ibqp
.srq
);
188 handler
= srq
->ibsrq
.event_handler
;
196 spin_lock_irqsave(&rq
->lock
, flags
);
199 /* Validate tail before using it since it is user writable. */
200 if (tail
>= rq
->size
)
203 if (unlikely(tail
== wq
->head
)) {
204 spin_unlock_irqrestore(&rq
->lock
, flags
);
208 /* Make sure entry is read after head index is read. */
210 wqe
= get_rwqe_ptr(rq
, tail
);
211 if (++tail
>= rq
->size
)
213 } while (!wr_id_only
&& !ipath_init_sge(qp
, wqe
, &qp
->r_len
,
215 qp
->r_wr_id
= wqe
->wr_id
;
219 qp
->r_wrid_valid
= 1;
224 * validate head pointer value and compute
225 * the number of remaining WQEs.
231 n
+= rq
->size
- tail
;
234 if (n
< srq
->limit
) {
238 spin_unlock_irqrestore(&rq
->lock
, flags
);
239 ev
.device
= qp
->ibqp
.device
;
240 ev
.element
.srq
= qp
->ibqp
.srq
;
241 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
242 handler(&ev
, srq
->ibsrq
.srq_context
);
246 spin_unlock_irqrestore(&rq
->lock
, flags
);
253 * ipath_ruc_loopback - handle UC and RC lookback requests
254 * @sqp: the sending QP
256 * This is called from ipath_do_send() to
257 * forward a WQE addressed to the same HCA.
258 * Note that although we are single threaded due to the tasklet, we still
259 * have to protect against post_send(). We don't have to worry about
260 * receive interrupts since this is a connected protocol and all packets
261 * will pass through here.
263 static void ipath_ruc_loopback(struct ipath_qp
*sqp
)
265 struct ipath_ibdev
*dev
= to_idev(sqp
->ibqp
.device
);
267 struct ipath_swqe
*wqe
;
268 struct ipath_sge
*sge
;
274 qp
= ipath_lookup_qpn(&dev
->qp_table
, sqp
->remote_qpn
);
281 spin_lock_irqsave(&sqp
->s_lock
, flags
);
283 if (!(ib_ipath_state_ops
[sqp
->state
] & IPATH_PROCESS_SEND_OK
) ||
284 sqp
->s_rnr_timeout
) {
285 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
289 /* Get the next send request. */
290 if (sqp
->s_last
== sqp
->s_head
) {
291 /* Send work queue is empty. */
292 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
297 * We can rely on the entry not changing without the s_lock
298 * being held until we update s_last.
300 wqe
= get_swqe_ptr(sqp
, sqp
->s_last
);
301 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
306 sqp
->s_sge
.sge
= wqe
->sg_list
[0];
307 sqp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
308 sqp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
309 sqp
->s_len
= wqe
->length
;
310 switch (wqe
->wr
.opcode
) {
311 case IB_WR_SEND_WITH_IMM
:
312 wc
.wc_flags
= IB_WC_WITH_IMM
;
313 wc
.imm_data
= wqe
->wr
.imm_data
;
316 if (!ipath_get_rwqe(qp
, 0)) {
319 if (qp
->ibqp
.qp_type
== IB_QPT_UC
)
321 if (sqp
->s_rnr_retry
== 0) {
322 wc
.status
= IB_WC_RNR_RETRY_EXC_ERR
;
325 if (sqp
->s_rnr_retry_cnt
< 7)
329 ib_ipath_rnr_table
[qp
->r_min_rnr_timer
];
330 ipath_insert_rnr_queue(sqp
);
335 case IB_WR_RDMA_WRITE_WITH_IMM
:
336 if (unlikely(!(qp
->qp_access_flags
&
337 IB_ACCESS_REMOTE_WRITE
))) {
338 wc
.status
= IB_WC_REM_INV_REQ_ERR
;
341 wc
.wc_flags
= IB_WC_WITH_IMM
;
342 wc
.imm_data
= wqe
->wr
.imm_data
;
343 if (!ipath_get_rwqe(qp
, 1))
346 case IB_WR_RDMA_WRITE
:
347 if (unlikely(!(qp
->qp_access_flags
&
348 IB_ACCESS_REMOTE_WRITE
))) {
349 wc
.status
= IB_WC_REM_INV_REQ_ERR
;
352 if (wqe
->length
== 0)
354 if (unlikely(!ipath_rkey_ok(qp
, &qp
->r_sge
, wqe
->length
,
355 wqe
->wr
.wr
.rdma
.remote_addr
,
356 wqe
->wr
.wr
.rdma
.rkey
,
357 IB_ACCESS_REMOTE_WRITE
))) {
359 wc
.status
= IB_WC_REM_ACCESS_ERR
;
361 wc
.wr_id
= wqe
->wr
.wr_id
;
362 wc
.opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
366 wc
.src_qp
= sqp
->remote_qpn
;
368 wc
.slid
= sqp
->remote_ah_attr
.dlid
;
369 wc
.sl
= sqp
->remote_ah_attr
.sl
;
370 wc
.dlid_path_bits
= 0;
372 spin_lock_irqsave(&sqp
->s_lock
, flags
);
373 ipath_sqerror_qp(sqp
, &wc
);
374 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
379 case IB_WR_RDMA_READ
:
380 if (unlikely(!(qp
->qp_access_flags
&
381 IB_ACCESS_REMOTE_READ
))) {
382 wc
.status
= IB_WC_REM_INV_REQ_ERR
;
385 if (unlikely(!ipath_rkey_ok(qp
, &sqp
->s_sge
, wqe
->length
,
386 wqe
->wr
.wr
.rdma
.remote_addr
,
387 wqe
->wr
.wr
.rdma
.rkey
,
388 IB_ACCESS_REMOTE_READ
)))
390 qp
->r_sge
.sge
= wqe
->sg_list
[0];
391 qp
->r_sge
.sg_list
= wqe
->sg_list
+ 1;
392 qp
->r_sge
.num_sge
= wqe
->wr
.num_sge
;
395 case IB_WR_ATOMIC_CMP_AND_SWP
:
396 case IB_WR_ATOMIC_FETCH_AND_ADD
:
397 if (unlikely(!(qp
->qp_access_flags
&
398 IB_ACCESS_REMOTE_ATOMIC
))) {
399 wc
.status
= IB_WC_REM_INV_REQ_ERR
;
402 if (unlikely(!ipath_rkey_ok(qp
, &qp
->r_sge
, sizeof(u64
),
403 wqe
->wr
.wr
.atomic
.remote_addr
,
404 wqe
->wr
.wr
.atomic
.rkey
,
405 IB_ACCESS_REMOTE_ATOMIC
)))
407 /* Perform atomic OP and save result. */
408 maddr
= (atomic64_t
*) qp
->r_sge
.sge
.vaddr
;
409 sdata
= wqe
->wr
.wr
.atomic
.compare_add
;
410 *(u64
*) sqp
->s_sge
.sge
.vaddr
=
411 (wqe
->wr
.opcode
== IB_WR_ATOMIC_FETCH_AND_ADD
) ?
412 (u64
) atomic64_add_return(sdata
, maddr
) - sdata
:
413 (u64
) cmpxchg((u64
*) qp
->r_sge
.sge
.vaddr
,
414 sdata
, wqe
->wr
.wr
.atomic
.swap
);
421 sge
= &sqp
->s_sge
.sge
;
423 u32 len
= sqp
->s_len
;
425 if (len
> sge
->length
)
427 if (len
> sge
->sge_length
)
428 len
= sge
->sge_length
;
430 ipath_copy_sge(&qp
->r_sge
, sge
->vaddr
, len
);
433 sge
->sge_length
-= len
;
434 if (sge
->sge_length
== 0) {
435 if (--sqp
->s_sge
.num_sge
)
436 *sge
= *sqp
->s_sge
.sg_list
++;
437 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
438 if (++sge
->n
>= IPATH_SEGSZ
) {
439 if (++sge
->m
>= sge
->mr
->mapsz
)
444 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
446 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
451 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
||
452 wqe
->wr
.opcode
== IB_WR_RDMA_READ
)
455 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE_WITH_IMM
)
456 wc
.opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
458 wc
.opcode
= IB_WC_RECV
;
459 wc
.wr_id
= qp
->r_wr_id
;
460 wc
.status
= IB_WC_SUCCESS
;
462 wc
.byte_len
= wqe
->length
;
464 wc
.src_qp
= qp
->remote_qpn
;
466 wc
.slid
= qp
->remote_ah_attr
.dlid
;
467 wc
.sl
= qp
->remote_ah_attr
.sl
;
468 wc
.dlid_path_bits
= 0;
470 /* Signal completion event if the solicited bit is set. */
471 ipath_cq_enter(to_icq(qp
->ibqp
.recv_cq
), &wc
,
472 wqe
->wr
.send_flags
& IB_SEND_SOLICITED
);
475 sqp
->s_rnr_retry
= sqp
->s_rnr_retry_cnt
;
476 ipath_send_complete(sqp
, wqe
, IB_WC_SUCCESS
);
480 if (atomic_dec_and_test(&qp
->refcount
))
484 static void want_buffer(struct ipath_devdata
*dd
)
488 spin_lock_irqsave(&dd
->ipath_sendctrl_lock
, flags
);
489 dd
->ipath_sendctrl
|= INFINIPATH_S_PIOINTBUFAVAIL
;
490 ipath_write_kreg(dd
, dd
->ipath_kregs
->kr_sendctrl
,
492 ipath_read_kreg64(dd
, dd
->ipath_kregs
->kr_scratch
);
493 spin_unlock_irqrestore(&dd
->ipath_sendctrl_lock
, flags
);
497 * ipath_no_bufs_available - tell the layer driver we need buffers
498 * @qp: the QP that caused the problem
499 * @dev: the device we ran out of buffers on
501 * Called when we run out of PIO buffers.
503 static void ipath_no_bufs_available(struct ipath_qp
*qp
,
504 struct ipath_ibdev
*dev
)
509 * Note that as soon as want_buffer() is called and
510 * possibly before it returns, ipath_ib_piobufavail()
511 * could be called. If we are still in the tasklet function,
512 * tasklet_hi_schedule() will not call us until the next time
513 * tasklet_hi_schedule() is called.
514 * We leave the busy flag set so that another post send doesn't
515 * try to put the same QP on the piowait list again.
517 spin_lock_irqsave(&dev
->pending_lock
, flags
);
518 list_add_tail(&qp
->piowait
, &dev
->piowait
);
519 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
520 want_buffer(dev
->dd
);
525 * ipath_make_grh - construct a GRH header
526 * @dev: a pointer to the ipath device
527 * @hdr: a pointer to the GRH header being constructed
528 * @grh: the global route address to send to
529 * @hwords: the number of 32 bit words of header being sent
530 * @nwords: the number of 32 bit words of data being sent
532 * Return the size of the header in 32 bit words.
534 u32
ipath_make_grh(struct ipath_ibdev
*dev
, struct ib_grh
*hdr
,
535 struct ib_global_route
*grh
, u32 hwords
, u32 nwords
)
537 hdr
->version_tclass_flow
=
538 cpu_to_be32((6 << 28) |
539 (grh
->traffic_class
<< 20) |
541 hdr
->paylen
= cpu_to_be16((hwords
- 2 + nwords
+ SIZE_OF_CRC
) << 2);
542 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
543 hdr
->next_hdr
= 0x1B;
544 hdr
->hop_limit
= grh
->hop_limit
;
545 /* The SGID is 32-bit aligned. */
546 hdr
->sgid
.global
.subnet_prefix
= dev
->gid_prefix
;
547 hdr
->sgid
.global
.interface_id
= dev
->dd
->ipath_guid
;
548 hdr
->dgid
= grh
->dgid
;
550 /* GRH header size in 32-bit words. */
551 return sizeof(struct ib_grh
) / sizeof(u32
);
554 void ipath_make_ruc_header(struct ipath_ibdev
*dev
, struct ipath_qp
*qp
,
555 struct ipath_other_headers
*ohdr
,
562 /* Construct the header. */
563 extra_bytes
= -qp
->s_cur_size
& 3;
564 nwords
= (qp
->s_cur_size
+ extra_bytes
) >> 2;
565 lrh0
= IPATH_LRH_BTH
;
566 if (unlikely(qp
->remote_ah_attr
.ah_flags
& IB_AH_GRH
)) {
567 qp
->s_hdrwords
+= ipath_make_grh(dev
, &qp
->s_hdr
.u
.l
.grh
,
568 &qp
->remote_ah_attr
.grh
,
569 qp
->s_hdrwords
, nwords
);
570 lrh0
= IPATH_LRH_GRH
;
572 lrh0
|= qp
->remote_ah_attr
.sl
<< 4;
573 qp
->s_hdr
.lrh
[0] = cpu_to_be16(lrh0
);
574 qp
->s_hdr
.lrh
[1] = cpu_to_be16(qp
->remote_ah_attr
.dlid
);
575 qp
->s_hdr
.lrh
[2] = cpu_to_be16(qp
->s_hdrwords
+ nwords
+ SIZE_OF_CRC
);
576 qp
->s_hdr
.lrh
[3] = cpu_to_be16(dev
->dd
->ipath_lid
);
577 bth0
|= ipath_get_pkey(dev
->dd
, qp
->s_pkey_index
);
578 bth0
|= extra_bytes
<< 20;
579 ohdr
->bth
[0] = cpu_to_be32(bth0
| (1 << 22));
580 ohdr
->bth
[1] = cpu_to_be32(qp
->remote_qpn
);
581 ohdr
->bth
[2] = cpu_to_be32(bth2
);
585 * ipath_do_send - perform a send on a QP
586 * @data: contains a pointer to the QP
588 * Process entries in the send work queue until credit or queue is
589 * exhausted. Only allow one CPU to send a packet per QP (tasklet).
590 * Otherwise, two threads could send packets out of order.
592 void ipath_do_send(unsigned long data
)
594 struct ipath_qp
*qp
= (struct ipath_qp
*)data
;
595 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
596 int (*make_req
)(struct ipath_qp
*qp
);
598 if (test_and_set_bit(IPATH_S_BUSY
, &qp
->s_busy
))
601 if ((qp
->ibqp
.qp_type
== IB_QPT_RC
||
602 qp
->ibqp
.qp_type
== IB_QPT_UC
) &&
603 qp
->remote_ah_attr
.dlid
== dev
->dd
->ipath_lid
) {
604 ipath_ruc_loopback(qp
);
608 if (qp
->ibqp
.qp_type
== IB_QPT_RC
)
609 make_req
= ipath_make_rc_req
;
610 else if (qp
->ibqp
.qp_type
== IB_QPT_UC
)
611 make_req
= ipath_make_uc_req
;
613 make_req
= ipath_make_ud_req
;
616 /* Check for a constructed packet to be sent. */
617 if (qp
->s_hdrwords
!= 0) {
619 * If no PIO bufs are available, return. An interrupt will
620 * call ipath_ib_piobufavail() when one is available.
622 if (ipath_verbs_send(qp
, &qp
->s_hdr
, qp
->s_hdrwords
,
623 qp
->s_cur_sge
, qp
->s_cur_size
)) {
624 ipath_no_bufs_available(qp
, dev
);
627 dev
->n_unicast_xmit
++;
628 /* Record that we sent the packet and s_hdr is empty. */
635 clear_bit(IPATH_S_BUSY
, &qp
->s_busy
);
639 void ipath_send_complete(struct ipath_qp
*qp
, struct ipath_swqe
*wqe
,
640 enum ib_wc_status status
)
645 /* See ch. 11.2.4.1 and 10.7.3.1 */
646 if (!(qp
->s_flags
& IPATH_S_SIGNAL_REQ_WR
) ||
647 (wqe
->wr
.send_flags
& IB_SEND_SIGNALED
) ||
648 status
!= IB_WC_SUCCESS
) {
651 wc
.wr_id
= wqe
->wr
.wr_id
;
653 wc
.opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
655 wc
.byte_len
= wqe
->length
;
663 wc
.dlid_path_bits
= 0;
665 ipath_cq_enter(to_icq(qp
->ibqp
.send_cq
), &wc
, 0);
668 spin_lock_irqsave(&qp
->s_lock
, flags
);
670 if (++last
>= qp
->s_size
)
673 spin_unlock_irqrestore(&qp
->s_lock
, flags
);