2 * Copyright (c) 2006 QLogic, Inc. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include "ipath_verbs.h"
35 #include "ipath_kernel.h"
37 /* cut down ridiculously long IB macro names */
38 #define OP(x) IB_OPCODE_RC_##x
41 * ipath_init_restart- initialize the qp->s_sge after a restart
42 * @qp: the QP who's SGE we're restarting
43 * @wqe: the work queue to initialize the QP's SGE from
45 * The QP s_lock should be held and interrupts disabled.
47 static void ipath_init_restart(struct ipath_qp
*qp
, struct ipath_swqe
*wqe
)
49 struct ipath_ibdev
*dev
;
52 len
= ((qp
->s_psn
- wqe
->psn
) & IPATH_PSN_MASK
) *
53 ib_mtu_enum_to_int(qp
->path_mtu
);
54 qp
->s_sge
.sge
= wqe
->sg_list
[0];
55 qp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
56 qp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
57 ipath_skip_sge(&qp
->s_sge
, len
);
58 qp
->s_len
= wqe
->length
- len
;
59 dev
= to_idev(qp
->ibqp
.device
);
60 spin_lock(&dev
->pending_lock
);
61 if (list_empty(&qp
->timerwait
))
62 list_add_tail(&qp
->timerwait
,
63 &dev
->pending
[dev
->pending_index
]);
64 spin_unlock(&dev
->pending_lock
);
68 * ipath_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
69 * @qp: a pointer to the QP
70 * @ohdr: a pointer to the IB header being constructed
73 * Return bth0 if constructed; otherwise, return 0.
74 * Note the QP s_lock must be held.
76 u32
ipath_make_rc_ack(struct ipath_qp
*qp
,
77 struct ipath_other_headers
*ohdr
,
84 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
88 * Send a response. Note that we are in the responder's
89 * side of the QP context.
91 switch (qp
->s_ack_state
) {
92 case OP(RDMA_READ_REQUEST
):
93 qp
->s_cur_sge
= &qp
->s_rdma_sge
;
97 qp
->s_ack_state
= OP(RDMA_READ_RESPONSE_FIRST
);
99 qp
->s_ack_state
= OP(RDMA_READ_RESPONSE_ONLY
);
100 qp
->s_rdma_len
-= len
;
101 bth0
= qp
->s_ack_state
<< 24;
102 ohdr
->u
.aeth
= ipath_compute_aeth(qp
);
106 case OP(RDMA_READ_RESPONSE_FIRST
):
107 qp
->s_ack_state
= OP(RDMA_READ_RESPONSE_MIDDLE
);
109 case OP(RDMA_READ_RESPONSE_MIDDLE
):
110 qp
->s_cur_sge
= &qp
->s_rdma_sge
;
111 len
= qp
->s_rdma_len
;
115 ohdr
->u
.aeth
= ipath_compute_aeth(qp
);
117 qp
->s_ack_state
= OP(RDMA_READ_RESPONSE_LAST
);
119 qp
->s_rdma_len
-= len
;
120 bth0
= qp
->s_ack_state
<< 24;
123 case OP(RDMA_READ_RESPONSE_LAST
):
124 case OP(RDMA_READ_RESPONSE_ONLY
):
126 * We have to prevent new requests from changing
127 * the r_sge state while a ipath_verbs_send()
130 qp
->s_ack_state
= OP(ACKNOWLEDGE
);
134 case OP(COMPARE_SWAP
):
136 qp
->s_cur_sge
= NULL
;
139 * Set the s_ack_state so the receive interrupt handler
140 * won't try to send an ACK (out of order) until this one
143 qp
->s_ack_state
= OP(RDMA_READ_RESPONSE_LAST
);
144 bth0
= OP(ATOMIC_ACKNOWLEDGE
) << 24;
145 ohdr
->u
.at
.aeth
= ipath_compute_aeth(qp
);
146 ohdr
->u
.at
.atomic_ack_eth
= cpu_to_be64(qp
->r_atomic_data
);
147 hwords
+= sizeof(ohdr
->u
.at
) / 4;
151 /* Send a regular ACK. */
152 qp
->s_cur_sge
= NULL
;
155 * Set the s_ack_state so the receive interrupt handler
156 * won't try to send an ACK (out of order) until this one
159 qp
->s_ack_state
= OP(RDMA_READ_RESPONSE_LAST
);
160 bth0
= OP(ACKNOWLEDGE
) << 24;
162 ohdr
->u
.aeth
= cpu_to_be32((qp
->r_msn
& IPATH_MSN_MASK
) |
164 IPATH_AETH_CREDIT_SHIFT
));
166 ohdr
->u
.aeth
= ipath_compute_aeth(qp
);
169 qp
->s_hdrwords
= hwords
;
170 qp
->s_cur_size
= len
;
177 * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
178 * @qp: a pointer to the QP
179 * @ohdr: a pointer to the IB header being constructed
180 * @pmtu: the path MTU
181 * @bth0p: pointer to the BTH opcode word
182 * @bth2p: pointer to the BTH PSN word
184 * Return 1 if constructed; otherwise, return 0.
185 * Note the QP s_lock must be held and interrupts disabled.
187 int ipath_make_rc_req(struct ipath_qp
*qp
,
188 struct ipath_other_headers
*ohdr
,
189 u32 pmtu
, u32
*bth0p
, u32
*bth2p
)
191 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
192 struct ipath_sge_state
*ss
;
193 struct ipath_swqe
*wqe
;
200 if (!(ib_ipath_state_ops
[qp
->state
] & IPATH_PROCESS_SEND_OK
) ||
204 /* Limit the number of packets sent without an ACK. */
205 if (ipath_cmp24(qp
->s_psn
, qp
->s_last_psn
+ IPATH_PSN_CREDIT
) > 0) {
206 qp
->s_wait_credit
= 1;
208 spin_lock(&dev
->pending_lock
);
209 if (list_empty(&qp
->timerwait
))
210 list_add_tail(&qp
->timerwait
,
211 &dev
->pending
[dev
->pending_index
]);
212 spin_unlock(&dev
->pending_lock
);
216 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
220 /* Send a request. */
221 wqe
= get_swqe_ptr(qp
, qp
->s_cur
);
222 switch (qp
->s_state
) {
225 * Resend an old request or start a new one.
227 * We keep track of the current SWQE so that
228 * we don't reset the "furthest progress" state
229 * if we need to back up.
232 if (qp
->s_cur
== qp
->s_tail
) {
233 /* Check if send work queue is empty. */
234 if (qp
->s_tail
== qp
->s_head
)
236 wqe
->psn
= qp
->s_next_psn
;
240 * Note that we have to be careful not to modify the
241 * original work request since we may need to resend
247 switch (wqe
->wr
.opcode
) {
249 case IB_WR_SEND_WITH_IMM
:
250 /* If no credit, return. */
251 if (qp
->s_lsn
!= (u32
) -1 &&
252 ipath_cmp24(wqe
->ssn
, qp
->s_lsn
+ 1) > 0)
254 wqe
->lpsn
= wqe
->psn
;
256 wqe
->lpsn
+= (len
- 1) / pmtu
;
257 qp
->s_state
= OP(SEND_FIRST
);
261 if (wqe
->wr
.opcode
== IB_WR_SEND
)
262 qp
->s_state
= OP(SEND_ONLY
);
264 qp
->s_state
= OP(SEND_ONLY_WITH_IMMEDIATE
);
265 /* Immediate data comes after the BTH */
266 ohdr
->u
.imm_data
= wqe
->wr
.imm_data
;
269 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
271 bth2
= 1 << 31; /* Request ACK. */
272 if (++qp
->s_cur
== qp
->s_size
)
276 case IB_WR_RDMA_WRITE
:
277 if (newreq
&& qp
->s_lsn
!= (u32
) -1)
280 case IB_WR_RDMA_WRITE_WITH_IMM
:
281 /* If no credit, return. */
282 if (qp
->s_lsn
!= (u32
) -1 &&
283 ipath_cmp24(wqe
->ssn
, qp
->s_lsn
+ 1) > 0)
285 ohdr
->u
.rc
.reth
.vaddr
=
286 cpu_to_be64(wqe
->wr
.wr
.rdma
.remote_addr
);
287 ohdr
->u
.rc
.reth
.rkey
=
288 cpu_to_be32(wqe
->wr
.wr
.rdma
.rkey
);
289 ohdr
->u
.rc
.reth
.length
= cpu_to_be32(len
);
290 hwords
+= sizeof(struct ib_reth
) / 4;
291 wqe
->lpsn
= wqe
->psn
;
293 wqe
->lpsn
+= (len
- 1) / pmtu
;
294 qp
->s_state
= OP(RDMA_WRITE_FIRST
);
298 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
)
299 qp
->s_state
= OP(RDMA_WRITE_ONLY
);
302 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
);
303 /* Immediate data comes after RETH */
304 ohdr
->u
.rc
.imm_data
= wqe
->wr
.imm_data
;
306 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
309 bth2
= 1 << 31; /* Request ACK. */
310 if (++qp
->s_cur
== qp
->s_size
)
314 case IB_WR_RDMA_READ
:
315 ohdr
->u
.rc
.reth
.vaddr
=
316 cpu_to_be64(wqe
->wr
.wr
.rdma
.remote_addr
);
317 ohdr
->u
.rc
.reth
.rkey
=
318 cpu_to_be32(wqe
->wr
.wr
.rdma
.rkey
);
319 ohdr
->u
.rc
.reth
.length
= cpu_to_be32(len
);
320 qp
->s_state
= OP(RDMA_READ_REQUEST
);
321 hwords
+= sizeof(ohdr
->u
.rc
.reth
) / 4;
323 if (qp
->s_lsn
!= (u32
) -1)
326 * Adjust s_next_psn to count the
327 * expected number of responses.
330 qp
->s_next_psn
+= (len
- 1) / pmtu
;
331 wqe
->lpsn
= qp
->s_next_psn
++;
335 if (++qp
->s_cur
== qp
->s_size
)
339 case IB_WR_ATOMIC_CMP_AND_SWP
:
340 case IB_WR_ATOMIC_FETCH_AND_ADD
:
341 if (wqe
->wr
.opcode
== IB_WR_ATOMIC_CMP_AND_SWP
)
342 qp
->s_state
= OP(COMPARE_SWAP
);
344 qp
->s_state
= OP(FETCH_ADD
);
345 ohdr
->u
.atomic_eth
.vaddr
= cpu_to_be64(
346 wqe
->wr
.wr
.atomic
.remote_addr
);
347 ohdr
->u
.atomic_eth
.rkey
= cpu_to_be32(
348 wqe
->wr
.wr
.atomic
.rkey
);
349 ohdr
->u
.atomic_eth
.swap_data
= cpu_to_be64(
350 wqe
->wr
.wr
.atomic
.swap
);
351 ohdr
->u
.atomic_eth
.compare_data
= cpu_to_be64(
352 wqe
->wr
.wr
.atomic
.compare_add
);
353 hwords
+= sizeof(struct ib_atomic_eth
) / 4;
355 if (qp
->s_lsn
!= (u32
) -1)
357 wqe
->lpsn
= wqe
->psn
;
359 if (++qp
->s_cur
== qp
->s_size
)
368 qp
->s_sge
.sge
= wqe
->sg_list
[0];
369 qp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
370 qp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
371 qp
->s_len
= wqe
->length
;
374 if (qp
->s_tail
>= qp
->s_size
)
377 bth2
|= qp
->s_psn
& IPATH_PSN_MASK
;
378 if (wqe
->wr
.opcode
== IB_WR_RDMA_READ
)
379 qp
->s_psn
= wqe
->lpsn
+ 1;
382 if ((int)(qp
->s_psn
- qp
->s_next_psn
) > 0)
383 qp
->s_next_psn
= qp
->s_psn
;
386 * Put the QP on the pending list so lost ACKs will cause
387 * a retry. More than one request can be pending so the
388 * QP may already be on the dev->pending list.
390 spin_lock(&dev
->pending_lock
);
391 if (list_empty(&qp
->timerwait
))
392 list_add_tail(&qp
->timerwait
,
393 &dev
->pending
[dev
->pending_index
]);
394 spin_unlock(&dev
->pending_lock
);
397 case OP(RDMA_READ_RESPONSE_FIRST
):
399 * This case can only happen if a send is restarted.
400 * See ipath_restart_rc().
402 ipath_init_restart(qp
, wqe
);
405 qp
->s_state
= OP(SEND_MIDDLE
);
407 case OP(SEND_MIDDLE
):
408 bth2
= qp
->s_psn
++ & IPATH_PSN_MASK
;
409 if ((int)(qp
->s_psn
- qp
->s_next_psn
) > 0)
410 qp
->s_next_psn
= qp
->s_psn
;
417 if (wqe
->wr
.opcode
== IB_WR_SEND
)
418 qp
->s_state
= OP(SEND_LAST
);
420 qp
->s_state
= OP(SEND_LAST_WITH_IMMEDIATE
);
421 /* Immediate data comes after the BTH */
422 ohdr
->u
.imm_data
= wqe
->wr
.imm_data
;
425 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
427 bth2
|= 1 << 31; /* Request ACK. */
429 if (qp
->s_cur
>= qp
->s_size
)
433 case OP(RDMA_READ_RESPONSE_LAST
):
435 * This case can only happen if a RDMA write is restarted.
436 * See ipath_restart_rc().
438 ipath_init_restart(qp
, wqe
);
440 case OP(RDMA_WRITE_FIRST
):
441 qp
->s_state
= OP(RDMA_WRITE_MIDDLE
);
443 case OP(RDMA_WRITE_MIDDLE
):
444 bth2
= qp
->s_psn
++ & IPATH_PSN_MASK
;
445 if ((int)(qp
->s_psn
- qp
->s_next_psn
) > 0)
446 qp
->s_next_psn
= qp
->s_psn
;
453 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
)
454 qp
->s_state
= OP(RDMA_WRITE_LAST
);
456 qp
->s_state
= OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
);
457 /* Immediate data comes after the BTH */
458 ohdr
->u
.imm_data
= wqe
->wr
.imm_data
;
460 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
463 bth2
|= 1 << 31; /* Request ACK. */
465 if (qp
->s_cur
>= qp
->s_size
)
469 case OP(RDMA_READ_RESPONSE_MIDDLE
):
471 * This case can only happen if a RDMA read is restarted.
472 * See ipath_restart_rc().
474 ipath_init_restart(qp
, wqe
);
475 len
= ((qp
->s_psn
- wqe
->psn
) & IPATH_PSN_MASK
) * pmtu
;
476 ohdr
->u
.rc
.reth
.vaddr
=
477 cpu_to_be64(wqe
->wr
.wr
.rdma
.remote_addr
+ len
);
478 ohdr
->u
.rc
.reth
.rkey
=
479 cpu_to_be32(wqe
->wr
.wr
.rdma
.rkey
);
480 ohdr
->u
.rc
.reth
.length
= cpu_to_be32(qp
->s_len
);
481 qp
->s_state
= OP(RDMA_READ_REQUEST
);
482 hwords
+= sizeof(ohdr
->u
.rc
.reth
) / 4;
483 bth2
= qp
->s_psn
++ & IPATH_PSN_MASK
;
484 if ((int)(qp
->s_psn
- qp
->s_next_psn
) > 0)
485 qp
->s_next_psn
= qp
->s_psn
;
489 if (qp
->s_cur
== qp
->s_size
)
493 case OP(RDMA_READ_REQUEST
):
494 case OP(COMPARE_SWAP
):
497 * We shouldn't start anything new until this request is
498 * finished. The ACK will handle rescheduling us. XXX The
499 * number of outstanding ones is negotiated at connection
500 * setup time (see pg. 258,289)? XXX Also, if we support
501 * multiple outstanding requests, we need to check the WQE
502 * IB_SEND_FENCE flag and not send a new request if a RDMA
503 * read or atomic is pending.
507 if (ipath_cmp24(qp
->s_psn
, qp
->s_last_psn
+ IPATH_PSN_CREDIT
- 1) >= 0)
508 bth2
|= 1 << 31; /* Request ACK. */
510 qp
->s_hdrwords
= hwords
;
512 qp
->s_cur_size
= len
;
513 *bth0p
= bth0
| (qp
->s_state
<< 24);
522 * send_rc_ack - Construct an ACK packet and send it
523 * @qp: a pointer to the QP
525 * This is called from ipath_rc_rcv() and only uses the receive
527 * Note that RDMA reads are handled in the send side QP state and tasklet.
529 static void send_rc_ack(struct ipath_qp
*qp
)
531 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
535 struct ipath_ib_header hdr
;
536 struct ipath_other_headers
*ohdr
;
538 /* Construct the header. */
540 lrh0
= IPATH_LRH_BTH
;
541 /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
543 if (unlikely(qp
->remote_ah_attr
.ah_flags
& IB_AH_GRH
)) {
544 hwords
+= ipath_make_grh(dev
, &hdr
.u
.l
.grh
,
545 &qp
->remote_ah_attr
.grh
,
548 lrh0
= IPATH_LRH_GRH
;
550 /* read pkey_index w/o lock (its atomic) */
551 bth0
= ipath_get_pkey(dev
->dd
, qp
->s_pkey_index
);
553 ohdr
->u
.aeth
= cpu_to_be32((qp
->r_msn
& IPATH_MSN_MASK
) |
555 IPATH_AETH_CREDIT_SHIFT
));
557 ohdr
->u
.aeth
= ipath_compute_aeth(qp
);
558 if (qp
->r_ack_state
>= OP(COMPARE_SWAP
)) {
559 bth0
|= OP(ATOMIC_ACKNOWLEDGE
) << 24;
560 ohdr
->u
.at
.atomic_ack_eth
= cpu_to_be64(qp
->r_atomic_data
);
561 hwords
+= sizeof(ohdr
->u
.at
.atomic_ack_eth
) / 4;
563 bth0
|= OP(ACKNOWLEDGE
) << 24;
564 lrh0
|= qp
->remote_ah_attr
.sl
<< 4;
565 hdr
.lrh
[0] = cpu_to_be16(lrh0
);
566 hdr
.lrh
[1] = cpu_to_be16(qp
->remote_ah_attr
.dlid
);
567 hdr
.lrh
[2] = cpu_to_be16(hwords
+ SIZE_OF_CRC
);
568 hdr
.lrh
[3] = cpu_to_be16(dev
->dd
->ipath_lid
);
569 ohdr
->bth
[0] = cpu_to_be32(bth0
);
570 ohdr
->bth
[1] = cpu_to_be32(qp
->remote_qpn
);
571 ohdr
->bth
[2] = cpu_to_be32(qp
->r_ack_psn
& IPATH_PSN_MASK
);
574 * If we can send the ACK, clear the ACK state.
576 if (ipath_verbs_send(dev
->dd
, hwords
, (u32
*) &hdr
, 0, NULL
) == 0) {
577 qp
->r_ack_state
= OP(ACKNOWLEDGE
);
578 dev
->n_unicast_xmit
++;
581 * We are out of PIO buffers at the moment.
582 * Pass responsibility for sending the ACK to the
583 * send tasklet so that when a PIO buffer becomes
584 * available, the ACK is sent ahead of other outgoing
588 spin_lock_irq(&qp
->s_lock
);
589 /* Don't coalesce if a RDMA read or atomic is pending. */
590 if (qp
->s_ack_state
== OP(ACKNOWLEDGE
) ||
591 qp
->s_ack_state
< OP(RDMA_READ_REQUEST
)) {
592 qp
->s_ack_state
= qp
->r_ack_state
;
593 qp
->s_nak_state
= qp
->r_nak_state
;
594 qp
->s_ack_psn
= qp
->r_ack_psn
;
595 qp
->r_ack_state
= OP(ACKNOWLEDGE
);
597 spin_unlock_irq(&qp
->s_lock
);
599 /* Call ipath_do_rc_send() in another thread. */
600 tasklet_hi_schedule(&qp
->s_task
);
605 * reset_psn - reset the QP state to send starting from PSN
607 * @psn: the packet sequence number to restart at
609 * This is called from ipath_rc_rcv() to process an incoming RC ACK
611 * Called at interrupt level with the QP s_lock held.
613 static void reset_psn(struct ipath_qp
*qp
, u32 psn
)
616 struct ipath_swqe
*wqe
= get_swqe_ptr(qp
, n
);
622 * If we are starting the request from the beginning,
623 * let the normal send code handle initialization.
625 if (ipath_cmp24(psn
, wqe
->psn
) <= 0) {
626 qp
->s_state
= OP(SEND_LAST
);
630 /* Find the work request opcode corresponding to the given PSN. */
631 opcode
= wqe
->wr
.opcode
;
635 if (++n
== qp
->s_size
)
639 wqe
= get_swqe_ptr(qp
, n
);
640 diff
= ipath_cmp24(psn
, wqe
->psn
);
645 * If we are starting the request from the beginning,
646 * let the normal send code handle initialization.
649 qp
->s_state
= OP(SEND_LAST
);
652 opcode
= wqe
->wr
.opcode
;
656 * Set the state to restart in the middle of a request.
657 * Don't change the s_sge, s_cur_sge, or s_cur_size.
658 * See ipath_do_rc_send().
662 case IB_WR_SEND_WITH_IMM
:
663 qp
->s_state
= OP(RDMA_READ_RESPONSE_FIRST
);
666 case IB_WR_RDMA_WRITE
:
667 case IB_WR_RDMA_WRITE_WITH_IMM
:
668 qp
->s_state
= OP(RDMA_READ_RESPONSE_LAST
);
671 case IB_WR_RDMA_READ
:
672 qp
->s_state
= OP(RDMA_READ_RESPONSE_MIDDLE
);
677 * This case shouldn't happen since its only
680 qp
->s_state
= OP(SEND_LAST
);
687 * ipath_restart_rc - back up requester to resend the last un-ACKed request
688 * @qp: the QP to restart
689 * @psn: packet sequence number for the request
690 * @wc: the work completion request
692 * The QP s_lock should be held and interrupts disabled.
694 void ipath_restart_rc(struct ipath_qp
*qp
, u32 psn
, struct ib_wc
*wc
)
696 struct ipath_swqe
*wqe
= get_swqe_ptr(qp
, qp
->s_last
);
697 struct ipath_ibdev
*dev
;
699 if (qp
->s_retry
== 0) {
700 wc
->wr_id
= wqe
->wr
.wr_id
;
701 wc
->status
= IB_WC_RETRY_EXC_ERR
;
702 wc
->opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
705 wc
->qp_num
= qp
->ibqp
.qp_num
;
706 wc
->src_qp
= qp
->remote_qpn
;
708 wc
->slid
= qp
->remote_ah_attr
.dlid
;
709 wc
->sl
= qp
->remote_ah_attr
.sl
;
710 wc
->dlid_path_bits
= 0;
712 ipath_sqerror_qp(qp
, wc
);
718 * Remove the QP from the timeout queue.
719 * Note: it may already have been removed by ipath_ib_timer().
721 dev
= to_idev(qp
->ibqp
.device
);
722 spin_lock(&dev
->pending_lock
);
723 if (!list_empty(&qp
->timerwait
))
724 list_del_init(&qp
->timerwait
);
725 spin_unlock(&dev
->pending_lock
);
727 if (wqe
->wr
.opcode
== IB_WR_RDMA_READ
)
730 dev
->n_rc_resends
+= (int)qp
->s_psn
- (int)psn
;
733 tasklet_hi_schedule(&qp
->s_task
);
739 static inline void update_last_psn(struct ipath_qp
*qp
, u32 psn
)
741 if (qp
->s_wait_credit
) {
742 qp
->s_wait_credit
= 0;
743 tasklet_hi_schedule(&qp
->s_task
);
745 qp
->s_last_psn
= psn
;
749 * do_rc_ack - process an incoming RC ACK
750 * @qp: the QP the ACK came in on
751 * @psn: the packet sequence number of the ACK
752 * @opcode: the opcode of the request that resulted in the ACK
754 * This is called from ipath_rc_rcv_resp() to process an incoming RC ACK
756 * Called at interrupt level with the QP s_lock held and interrupts disabled.
757 * Returns 1 if OK, 0 if current operation should be aborted (NAK).
759 static int do_rc_ack(struct ipath_qp
*qp
, u32 aeth
, u32 psn
, int opcode
)
761 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
763 struct ipath_swqe
*wqe
;
768 * Remove the QP from the timeout queue (or RNR timeout queue).
769 * If ipath_ib_timer() has already removed it,
770 * it's OK since we hold the QP s_lock and ipath_restart_rc()
771 * just won't find anything to restart if we ACK everything.
773 spin_lock(&dev
->pending_lock
);
774 if (!list_empty(&qp
->timerwait
))
775 list_del_init(&qp
->timerwait
);
776 spin_unlock(&dev
->pending_lock
);
778 /* Nothing is pending to ACK/NAK. */
779 if (unlikely(qp
->s_last
== qp
->s_tail
))
783 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
784 * requests and implicitly NAK RDMA read and atomic requests issued
785 * before the NAK'ed request. The MSN won't include the NAK'ed
786 * request but will include an ACK'ed request(s).
791 wqe
= get_swqe_ptr(qp
, qp
->s_last
);
794 * The MSN might be for a later WQE than the PSN indicates so
795 * only complete WQEs that the PSN finishes.
797 while (ipath_cmp24(ack_psn
, wqe
->lpsn
) >= 0) {
799 * If this request is a RDMA read or atomic, and the ACK is
800 * for a later operation, this ACK NAKs the RDMA read or
801 * atomic. In other words, only a RDMA_READ_LAST or ONLY
802 * can ACK a RDMA read and likewise for atomic ops. Note
803 * that the NAK case can only happen if relaxed ordering is
804 * used and requests are sent after an RDMA read or atomic
805 * is sent but before the response is received.
807 if ((wqe
->wr
.opcode
== IB_WR_RDMA_READ
&&
808 (opcode
!= OP(RDMA_READ_RESPONSE_LAST
) ||
809 ipath_cmp24(ack_psn
, wqe
->lpsn
) != 0)) ||
810 ((wqe
->wr
.opcode
== IB_WR_ATOMIC_CMP_AND_SWP
||
811 wqe
->wr
.opcode
== IB_WR_ATOMIC_FETCH_AND_ADD
) &&
812 (opcode
!= OP(ATOMIC_ACKNOWLEDGE
) ||
813 ipath_cmp24(wqe
->psn
, psn
) != 0))) {
815 * The last valid PSN seen is the previous
818 update_last_psn(qp
, wqe
->psn
- 1);
819 /* Retry this request. */
820 ipath_restart_rc(qp
, wqe
->psn
, &wc
);
822 * No need to process the ACK/NAK since we are
823 * restarting an earlier request.
827 if (wqe
->wr
.opcode
== IB_WR_RDMA_READ
||
828 wqe
->wr
.opcode
== IB_WR_ATOMIC_CMP_AND_SWP
||
829 wqe
->wr
.opcode
== IB_WR_ATOMIC_FETCH_AND_ADD
)
830 tasklet_hi_schedule(&qp
->s_task
);
831 /* Post a send completion queue entry if requested. */
832 if (!test_bit(IPATH_S_SIGNAL_REQ_WR
, &qp
->s_flags
) ||
833 (wqe
->wr
.send_flags
& IB_SEND_SIGNALED
)) {
834 wc
.wr_id
= wqe
->wr
.wr_id
;
835 wc
.status
= IB_WC_SUCCESS
;
836 wc
.opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
838 wc
.byte_len
= wqe
->length
;
839 wc
.qp_num
= qp
->ibqp
.qp_num
;
840 wc
.src_qp
= qp
->remote_qpn
;
842 wc
.slid
= qp
->remote_ah_attr
.dlid
;
843 wc
.sl
= qp
->remote_ah_attr
.sl
;
844 wc
.dlid_path_bits
= 0;
846 ipath_cq_enter(to_icq(qp
->ibqp
.send_cq
), &wc
, 0);
848 qp
->s_retry
= qp
->s_retry_cnt
;
850 * If we are completing a request which is in the process of
851 * being resent, we can stop resending it since we know the
852 * responder has already seen it.
854 if (qp
->s_last
== qp
->s_cur
) {
855 if (++qp
->s_cur
>= qp
->s_size
)
857 wqe
= get_swqe_ptr(qp
, qp
->s_cur
);
858 qp
->s_state
= OP(SEND_LAST
);
859 qp
->s_psn
= wqe
->psn
;
861 if (++qp
->s_last
>= qp
->s_size
)
863 wqe
= get_swqe_ptr(qp
, qp
->s_last
);
864 if (qp
->s_last
== qp
->s_tail
)
868 switch (aeth
>> 29) {
871 /* If this is a partial ACK, reset the retransmit timer. */
872 if (qp
->s_last
!= qp
->s_tail
) {
873 spin_lock(&dev
->pending_lock
);
874 list_add_tail(&qp
->timerwait
,
875 &dev
->pending
[dev
->pending_index
]);
876 spin_unlock(&dev
->pending_lock
);
878 ipath_get_credit(qp
, aeth
);
879 qp
->s_rnr_retry
= qp
->s_rnr_retry_cnt
;
880 qp
->s_retry
= qp
->s_retry_cnt
;
881 update_last_psn(qp
, psn
);
885 case 1: /* RNR NAK */
887 if (qp
->s_rnr_retry
== 0) {
888 if (qp
->s_last
== qp
->s_tail
)
891 wc
.status
= IB_WC_RNR_RETRY_EXC_ERR
;
894 if (qp
->s_rnr_retry_cnt
< 7)
896 if (qp
->s_last
== qp
->s_tail
)
899 /* The last valid PSN is the previous PSN. */
900 update_last_psn(qp
, psn
- 1);
902 dev
->n_rc_resends
+= (int)qp
->s_psn
- (int)psn
;
907 ib_ipath_rnr_table
[(aeth
>> IPATH_AETH_CREDIT_SHIFT
) &
908 IPATH_AETH_CREDIT_MASK
];
909 ipath_insert_rnr_queue(qp
);
913 /* The last valid PSN seen is the previous request's. */
914 if (qp
->s_last
!= qp
->s_tail
)
915 update_last_psn(qp
, wqe
->psn
- 1);
916 switch ((aeth
>> IPATH_AETH_CREDIT_SHIFT
) &
917 IPATH_AETH_CREDIT_MASK
) {
918 case 0: /* PSN sequence error */
921 * Back up to the responder's expected PSN. XXX
922 * Note that we might get a NAK in the middle of an
923 * RDMA READ response which terminates the RDMA
926 if (qp
->s_last
== qp
->s_tail
)
929 if (ipath_cmp24(psn
, wqe
->psn
) < 0)
932 /* Retry the request. */
933 ipath_restart_rc(qp
, psn
, &wc
);
936 case 1: /* Invalid Request */
937 wc
.status
= IB_WC_REM_INV_REQ_ERR
;
941 case 2: /* Remote Access Error */
942 wc
.status
= IB_WC_REM_ACCESS_ERR
;
946 case 3: /* Remote Operation Error */
947 wc
.status
= IB_WC_REM_OP_ERR
;
950 wc
.wr_id
= wqe
->wr
.wr_id
;
951 wc
.opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
954 wc
.qp_num
= qp
->ibqp
.qp_num
;
955 wc
.src_qp
= qp
->remote_qpn
;
957 wc
.slid
= qp
->remote_ah_attr
.dlid
;
958 wc
.sl
= qp
->remote_ah_attr
.sl
;
959 wc
.dlid_path_bits
= 0;
961 ipath_sqerror_qp(qp
, &wc
);
965 /* Ignore other reserved NAK error codes */
968 qp
->s_rnr_retry
= qp
->s_rnr_retry_cnt
;
971 default: /* 2: reserved */
973 /* Ignore reserved NAK codes. */
982 * ipath_rc_rcv_resp - process an incoming RC response packet
983 * @dev: the device this packet came in on
984 * @ohdr: the other headers for this packet
985 * @data: the packet data
986 * @tlen: the packet length
987 * @qp: the QP for this packet
988 * @opcode: the opcode for this packet
989 * @psn: the packet sequence number for this packet
990 * @hdrsize: the header length
991 * @pmtu: the path MTU
992 * @header_in_data: true if part of the header data is in the data buffer
994 * This is called from ipath_rc_rcv() to process an incoming RC response
995 * packet for the given QP.
996 * Called at interrupt level.
998 static inline void ipath_rc_rcv_resp(struct ipath_ibdev
*dev
,
999 struct ipath_other_headers
*ohdr
,
1000 void *data
, u32 tlen
,
1001 struct ipath_qp
*qp
,
1003 u32 psn
, u32 hdrsize
, u32 pmtu
,
1006 unsigned long flags
;
1012 spin_lock_irqsave(&qp
->s_lock
, flags
);
1014 /* Ignore invalid responses. */
1015 if (ipath_cmp24(psn
, qp
->s_next_psn
) >= 0)
1018 /* Ignore duplicate responses. */
1019 diff
= ipath_cmp24(psn
, qp
->s_last_psn
);
1020 if (unlikely(diff
<= 0)) {
1021 /* Update credits for "ghost" ACKs */
1022 if (diff
== 0 && opcode
== OP(ACKNOWLEDGE
)) {
1023 if (!header_in_data
)
1024 aeth
= be32_to_cpu(ohdr
->u
.aeth
);
1026 aeth
= be32_to_cpu(((__be32
*) data
)[0]);
1027 data
+= sizeof(__be32
);
1029 if ((aeth
>> 29) == 0)
1030 ipath_get_credit(qp
, aeth
);
1036 case OP(ACKNOWLEDGE
):
1037 case OP(ATOMIC_ACKNOWLEDGE
):
1038 case OP(RDMA_READ_RESPONSE_FIRST
):
1039 if (!header_in_data
)
1040 aeth
= be32_to_cpu(ohdr
->u
.aeth
);
1042 aeth
= be32_to_cpu(((__be32
*) data
)[0]);
1043 data
+= sizeof(__be32
);
1045 if (opcode
== OP(ATOMIC_ACKNOWLEDGE
))
1046 *(u64
*) qp
->s_sge
.sge
.vaddr
= *(u64
*) data
;
1047 if (!do_rc_ack(qp
, aeth
, psn
, opcode
) ||
1048 opcode
!= OP(RDMA_READ_RESPONSE_FIRST
))
1052 * do_rc_ack() has already checked the PSN so skip
1053 * the sequence check.
1057 case OP(RDMA_READ_RESPONSE_MIDDLE
):
1058 /* no AETH, no ACK */
1059 if (unlikely(ipath_cmp24(psn
, qp
->s_last_psn
+ 1))) {
1061 if (qp
->s_last
!= qp
->s_tail
)
1062 ipath_restart_rc(qp
, qp
->s_last_psn
+ 1, &wc
);
1066 if (unlikely(qp
->s_state
!= OP(RDMA_READ_REQUEST
)))
1068 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ 4)))
1070 if (unlikely(pmtu
>= qp
->s_len
))
1072 /* We got a response so update the timeout. */
1073 if (unlikely(qp
->s_last
== qp
->s_tail
||
1074 get_swqe_ptr(qp
, qp
->s_last
)->wr
.opcode
!=
1077 spin_lock(&dev
->pending_lock
);
1078 if (qp
->s_rnr_timeout
== 0 && !list_empty(&qp
->timerwait
))
1079 list_move_tail(&qp
->timerwait
,
1080 &dev
->pending
[dev
->pending_index
]);
1081 spin_unlock(&dev
->pending_lock
);
1083 * Update the RDMA receive state but do the copy w/o
1084 * holding the locks and blocking interrupts.
1085 * XXX Yet another place that affects relaxed RDMA order
1086 * since we don't want s_sge modified.
1089 update_last_psn(qp
, psn
);
1090 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1091 ipath_copy_sge(&qp
->s_sge
, data
, pmtu
);
1094 case OP(RDMA_READ_RESPONSE_LAST
):
1095 /* ACKs READ req. */
1096 if (unlikely(ipath_cmp24(psn
, qp
->s_last_psn
+ 1))) {
1098 if (qp
->s_last
!= qp
->s_tail
)
1099 ipath_restart_rc(qp
, qp
->s_last_psn
+ 1, &wc
);
1103 case OP(RDMA_READ_RESPONSE_ONLY
):
1104 if (unlikely(qp
->s_state
!= OP(RDMA_READ_REQUEST
)))
1107 * Get the number of bytes the message was padded by.
1109 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
1111 * Check that the data size is >= 1 && <= pmtu.
1112 * Remember to account for the AETH header (4) and
1115 if (unlikely(tlen
<= (hdrsize
+ pad
+ 8))) {
1116 /* XXX Need to generate an error CQ entry. */
1119 tlen
-= hdrsize
+ pad
+ 8;
1120 if (unlikely(tlen
!= qp
->s_len
)) {
1121 /* XXX Need to generate an error CQ entry. */
1124 if (!header_in_data
)
1125 aeth
= be32_to_cpu(ohdr
->u
.aeth
);
1127 aeth
= be32_to_cpu(((__be32
*) data
)[0]);
1128 data
+= sizeof(__be32
);
1130 ipath_copy_sge(&qp
->s_sge
, data
, tlen
);
1131 if (do_rc_ack(qp
, aeth
, psn
, OP(RDMA_READ_RESPONSE_LAST
))) {
1133 * Change the state so we contimue
1134 * processing new requests and wake up the
1135 * tasklet if there are posted sends.
1137 qp
->s_state
= OP(SEND_LAST
);
1138 if (qp
->s_tail
!= qp
->s_head
)
1139 tasklet_hi_schedule(&qp
->s_task
);
1145 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1151 * ipath_rc_rcv_error - process an incoming duplicate or error RC packet
1152 * @dev: the device this packet came in on
1153 * @ohdr: the other headers for this packet
1154 * @data: the packet data
1155 * @qp: the QP for this packet
1156 * @opcode: the opcode for this packet
1157 * @psn: the packet sequence number for this packet
1158 * @diff: the difference between the PSN and the expected PSN
1159 * @header_in_data: true if part of the header data is in the data buffer
1161 * This is called from ipath_rc_rcv() to process an unexpected
1162 * incoming RC packet for the given QP.
1163 * Called at interrupt level.
1164 * Return 1 if no more processing is needed; otherwise return 0 to
1165 * schedule a response to be sent and the s_lock unlocked.
1167 static inline int ipath_rc_rcv_error(struct ipath_ibdev
*dev
,
1168 struct ipath_other_headers
*ohdr
,
1170 struct ipath_qp
*qp
,
1176 struct ib_reth
*reth
;
1180 * Packet sequence error.
1181 * A NAK will ACK earlier sends and RDMA writes.
1182 * Don't queue the NAK if a RDMA read, atomic, or
1183 * NAK is pending though.
1185 if (qp
->s_ack_state
!= OP(ACKNOWLEDGE
) ||
1186 qp
->r_nak_state
!= 0)
1188 if (qp
->r_ack_state
< OP(COMPARE_SWAP
)) {
1189 qp
->r_ack_state
= OP(SEND_ONLY
);
1190 qp
->r_nak_state
= IB_NAK_PSN_ERROR
;
1191 /* Use the expected PSN. */
1192 qp
->r_ack_psn
= qp
->r_psn
;
1198 * Handle a duplicate request. Don't re-execute SEND, RDMA
1199 * write or atomic op. Don't NAK errors, just silently drop
1200 * the duplicate request. Note that r_sge, r_len, and
1201 * r_rcv_len may be in use so don't modify them.
1203 * We are supposed to ACK the earliest duplicate PSN but we
1204 * can coalesce an outstanding duplicate ACK. We have to
1205 * send the earliest so that RDMA reads can be restarted at
1206 * the requester's expected PSN.
1208 if (opcode
== OP(RDMA_READ_REQUEST
)) {
1209 /* RETH comes after BTH */
1210 if (!header_in_data
)
1211 reth
= &ohdr
->u
.rc
.reth
;
1213 reth
= (struct ib_reth
*)data
;
1214 data
+= sizeof(*reth
);
1217 * If we receive a duplicate RDMA request, it means the
1218 * requester saw a sequence error and needs to restart
1219 * from an earlier point. We can abort the current
1220 * RDMA read send in that case.
1222 spin_lock_irq(&qp
->s_lock
);
1223 if (qp
->s_ack_state
!= OP(ACKNOWLEDGE
) &&
1224 (qp
->s_hdrwords
|| ipath_cmp24(psn
, qp
->s_ack_psn
) >= 0)) {
1226 * We are already sending earlier requested data.
1227 * Don't abort it to send later out of sequence data.
1229 spin_unlock_irq(&qp
->s_lock
);
1232 qp
->s_rdma_len
= be32_to_cpu(reth
->length
);
1233 if (qp
->s_rdma_len
!= 0) {
1234 u32 rkey
= be32_to_cpu(reth
->rkey
);
1235 u64 vaddr
= be64_to_cpu(reth
->vaddr
);
1239 * Address range must be a subset of the original
1240 * request and start on pmtu boundaries.
1242 ok
= ipath_rkey_ok(qp
, &qp
->s_rdma_sge
,
1243 qp
->s_rdma_len
, vaddr
, rkey
,
1244 IB_ACCESS_REMOTE_READ
);
1245 if (unlikely(!ok
)) {
1246 spin_unlock_irq(&qp
->s_lock
);
1250 qp
->s_rdma_sge
.sg_list
= NULL
;
1251 qp
->s_rdma_sge
.num_sge
= 0;
1252 qp
->s_rdma_sge
.sge
.mr
= NULL
;
1253 qp
->s_rdma_sge
.sge
.vaddr
= NULL
;
1254 qp
->s_rdma_sge
.sge
.length
= 0;
1255 qp
->s_rdma_sge
.sge
.sge_length
= 0;
1257 qp
->s_ack_state
= opcode
;
1258 qp
->s_ack_psn
= psn
;
1259 spin_unlock_irq(&qp
->s_lock
);
1260 tasklet_hi_schedule(&qp
->s_task
);
1265 * A pending RDMA read will ACK anything before it so
1266 * ignore earlier duplicate requests.
1268 if (qp
->s_ack_state
!= OP(ACKNOWLEDGE
))
1272 * If an ACK is pending, don't replace the pending ACK
1273 * with an earlier one since the later one will ACK the earlier.
1274 * Also, if we already have a pending atomic, send it.
1276 if (qp
->r_ack_state
!= OP(ACKNOWLEDGE
) &&
1277 (ipath_cmp24(psn
, qp
->r_ack_psn
) <= 0 ||
1278 qp
->r_ack_state
>= OP(COMPARE_SWAP
)))
1281 case OP(COMPARE_SWAP
):
1284 * Check for the PSN of the last atomic operation
1285 * performed and resend the result if found.
1287 if ((psn
& IPATH_PSN_MASK
) != qp
->r_atomic_psn
)
1291 qp
->r_ack_state
= opcode
;
1292 qp
->r_nak_state
= 0;
1293 qp
->r_ack_psn
= psn
;
1301 static void ipath_rc_error(struct ipath_qp
*qp
, enum ib_wc_status err
)
1303 spin_lock_irq(&qp
->s_lock
);
1304 qp
->state
= IB_QPS_ERR
;
1305 ipath_error_qp(qp
, err
);
1306 spin_unlock_irq(&qp
->s_lock
);
1310 * ipath_rc_rcv - process an incoming RC packet
1311 * @dev: the device this packet came in on
1312 * @hdr: the header of this packet
1313 * @has_grh: true if the header has a GRH
1314 * @data: the packet data
1315 * @tlen: the packet length
1316 * @qp: the QP for this packet
1318 * This is called from ipath_qp_rcv() to process an incoming RC packet
1320 * Called at interrupt level.
1322 void ipath_rc_rcv(struct ipath_ibdev
*dev
, struct ipath_ib_header
*hdr
,
1323 int has_grh
, void *data
, u32 tlen
, struct ipath_qp
*qp
)
1325 struct ipath_other_headers
*ohdr
;
1331 u32 pmtu
= ib_mtu_enum_to_int(qp
->path_mtu
);
1333 struct ib_reth
*reth
;
1336 /* Validate the SLID. See Ch. 9.6.1.5 */
1337 if (unlikely(be16_to_cpu(hdr
->lrh
[3]) != qp
->remote_ah_attr
.dlid
))
1343 hdrsize
= 8 + 12; /* LRH + BTH */
1344 psn
= be32_to_cpu(ohdr
->bth
[2]);
1347 ohdr
= &hdr
->u
.l
.oth
;
1348 hdrsize
= 8 + 40 + 12; /* LRH + GRH + BTH */
1350 * The header with GRH is 60 bytes and the core driver sets
1351 * the eager header buffer size to 56 bytes so the last 4
1352 * bytes of the BTH header (PSN) is in the data buffer.
1354 header_in_data
= dev
->dd
->ipath_rcvhdrentsize
== 16;
1355 if (header_in_data
) {
1356 psn
= be32_to_cpu(((__be32
*) data
)[0]);
1357 data
+= sizeof(__be32
);
1359 psn
= be32_to_cpu(ohdr
->bth
[2]);
1363 * Process responses (ACKs) before anything else. Note that the
1364 * packet sequence number will be for something in the send work
1365 * queue rather than the expected receive packet sequence number.
1366 * In other words, this QP is the requester.
1368 opcode
= be32_to_cpu(ohdr
->bth
[0]) >> 24;
1369 if (opcode
>= OP(RDMA_READ_RESPONSE_FIRST
) &&
1370 opcode
<= OP(ATOMIC_ACKNOWLEDGE
)) {
1371 ipath_rc_rcv_resp(dev
, ohdr
, data
, tlen
, qp
, opcode
, psn
,
1372 hdrsize
, pmtu
, header_in_data
);
1376 /* Compute 24 bits worth of difference. */
1377 diff
= ipath_cmp24(psn
, qp
->r_psn
);
1378 if (unlikely(diff
)) {
1379 if (ipath_rc_rcv_error(dev
, ohdr
, data
, qp
, opcode
,
1380 psn
, diff
, header_in_data
))
1385 /* Check for opcode sequence errors. */
1386 switch (qp
->r_state
) {
1387 case OP(SEND_FIRST
):
1388 case OP(SEND_MIDDLE
):
1389 if (opcode
== OP(SEND_MIDDLE
) ||
1390 opcode
== OP(SEND_LAST
) ||
1391 opcode
== OP(SEND_LAST_WITH_IMMEDIATE
))
1395 * A NAK will ACK earlier sends and RDMA writes.
1396 * Don't queue the NAK if a RDMA read, atomic, or NAK
1397 * is pending though.
1399 if (qp
->r_ack_state
>= OP(COMPARE_SWAP
))
1401 ipath_rc_error(qp
, IB_WC_REM_INV_REQ_ERR
);
1402 qp
->r_ack_state
= OP(SEND_ONLY
);
1403 qp
->r_nak_state
= IB_NAK_INVALID_REQUEST
;
1404 qp
->r_ack_psn
= qp
->r_psn
;
1407 case OP(RDMA_WRITE_FIRST
):
1408 case OP(RDMA_WRITE_MIDDLE
):
1409 if (opcode
== OP(RDMA_WRITE_MIDDLE
) ||
1410 opcode
== OP(RDMA_WRITE_LAST
) ||
1411 opcode
== OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
))
1416 if (opcode
== OP(SEND_MIDDLE
) ||
1417 opcode
== OP(SEND_LAST
) ||
1418 opcode
== OP(SEND_LAST_WITH_IMMEDIATE
) ||
1419 opcode
== OP(RDMA_WRITE_MIDDLE
) ||
1420 opcode
== OP(RDMA_WRITE_LAST
) ||
1421 opcode
== OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
))
1424 * Note that it is up to the requester to not send a new
1425 * RDMA read or atomic operation before receiving an ACK
1426 * for the previous operation.
1434 /* OK, process the packet. */
1436 case OP(SEND_FIRST
):
1437 if (!ipath_get_rwqe(qp
, 0)) {
1440 * A RNR NAK will ACK earlier sends and RDMA writes.
1441 * Don't queue the NAK if a RDMA read or atomic
1442 * is pending though.
1444 if (qp
->r_ack_state
>= OP(COMPARE_SWAP
))
1446 qp
->r_ack_state
= OP(SEND_ONLY
);
1447 qp
->r_nak_state
= IB_RNR_NAK
| qp
->r_min_rnr_timer
;
1448 qp
->r_ack_psn
= qp
->r_psn
;
1453 case OP(SEND_MIDDLE
):
1454 case OP(RDMA_WRITE_MIDDLE
):
1456 /* Check for invalid length PMTU or posted rwqe len. */
1457 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ 4)))
1459 qp
->r_rcv_len
+= pmtu
;
1460 if (unlikely(qp
->r_rcv_len
> qp
->r_len
))
1462 ipath_copy_sge(&qp
->r_sge
, data
, pmtu
);
1465 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
):
1467 if (!ipath_get_rwqe(qp
, 1))
1472 case OP(SEND_ONLY_WITH_IMMEDIATE
):
1473 if (!ipath_get_rwqe(qp
, 0))
1476 if (opcode
== OP(SEND_ONLY
))
1479 case OP(SEND_LAST_WITH_IMMEDIATE
):
1481 if (header_in_data
) {
1482 wc
.imm_data
= *(__be32
*) data
;
1483 data
+= sizeof(__be32
);
1485 /* Immediate data comes after BTH */
1486 wc
.imm_data
= ohdr
->u
.imm_data
;
1489 wc
.wc_flags
= IB_WC_WITH_IMM
;
1492 case OP(RDMA_WRITE_LAST
):
1494 /* Get the number of bytes the message was padded by. */
1495 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
1496 /* Check for invalid length. */
1497 /* XXX LAST len should be >= 1 */
1498 if (unlikely(tlen
< (hdrsize
+ pad
+ 4)))
1500 /* Don't count the CRC. */
1501 tlen
-= (hdrsize
+ pad
+ 4);
1502 wc
.byte_len
= tlen
+ qp
->r_rcv_len
;
1503 if (unlikely(wc
.byte_len
> qp
->r_len
))
1505 ipath_copy_sge(&qp
->r_sge
, data
, tlen
);
1507 if (!qp
->r_wrid_valid
)
1509 qp
->r_wrid_valid
= 0;
1510 wc
.wr_id
= qp
->r_wr_id
;
1511 wc
.status
= IB_WC_SUCCESS
;
1512 wc
.opcode
= IB_WC_RECV
;
1514 wc
.qp_num
= qp
->ibqp
.qp_num
;
1515 wc
.src_qp
= qp
->remote_qpn
;
1517 wc
.slid
= qp
->remote_ah_attr
.dlid
;
1518 wc
.sl
= qp
->remote_ah_attr
.sl
;
1519 wc
.dlid_path_bits
= 0;
1521 /* Signal completion event if the solicited bit is set. */
1522 ipath_cq_enter(to_icq(qp
->ibqp
.recv_cq
), &wc
,
1524 __constant_cpu_to_be32(1 << 23)) != 0);
1527 case OP(RDMA_WRITE_FIRST
):
1528 case OP(RDMA_WRITE_ONLY
):
1529 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
):
1531 /* RETH comes after BTH */
1532 if (!header_in_data
)
1533 reth
= &ohdr
->u
.rc
.reth
;
1535 reth
= (struct ib_reth
*)data
;
1536 data
+= sizeof(*reth
);
1538 hdrsize
+= sizeof(*reth
);
1539 qp
->r_len
= be32_to_cpu(reth
->length
);
1541 if (qp
->r_len
!= 0) {
1542 u32 rkey
= be32_to_cpu(reth
->rkey
);
1543 u64 vaddr
= be64_to_cpu(reth
->vaddr
);
1546 /* Check rkey & NAK */
1547 ok
= ipath_rkey_ok(qp
, &qp
->r_sge
,
1548 qp
->r_len
, vaddr
, rkey
,
1549 IB_ACCESS_REMOTE_WRITE
);
1553 qp
->r_sge
.sg_list
= NULL
;
1554 qp
->r_sge
.sge
.mr
= NULL
;
1555 qp
->r_sge
.sge
.vaddr
= NULL
;
1556 qp
->r_sge
.sge
.length
= 0;
1557 qp
->r_sge
.sge
.sge_length
= 0;
1559 if (unlikely(!(qp
->qp_access_flags
&
1560 IB_ACCESS_REMOTE_WRITE
)))
1562 if (opcode
== OP(RDMA_WRITE_FIRST
))
1564 else if (opcode
== OP(RDMA_WRITE_ONLY
))
1566 if (!ipath_get_rwqe(qp
, 1))
1570 case OP(RDMA_READ_REQUEST
):
1571 /* RETH comes after BTH */
1572 if (!header_in_data
)
1573 reth
= &ohdr
->u
.rc
.reth
;
1575 reth
= (struct ib_reth
*)data
;
1576 data
+= sizeof(*reth
);
1578 if (unlikely(!(qp
->qp_access_flags
&
1579 IB_ACCESS_REMOTE_READ
)))
1581 spin_lock_irq(&qp
->s_lock
);
1582 qp
->s_rdma_len
= be32_to_cpu(reth
->length
);
1583 if (qp
->s_rdma_len
!= 0) {
1584 u32 rkey
= be32_to_cpu(reth
->rkey
);
1585 u64 vaddr
= be64_to_cpu(reth
->vaddr
);
1588 /* Check rkey & NAK */
1589 ok
= ipath_rkey_ok(qp
, &qp
->s_rdma_sge
,
1590 qp
->s_rdma_len
, vaddr
, rkey
,
1591 IB_ACCESS_REMOTE_READ
);
1592 if (unlikely(!ok
)) {
1593 spin_unlock_irq(&qp
->s_lock
);
1597 * Update the next expected PSN. We add 1 later
1598 * below, so only add the remainder here.
1600 if (qp
->s_rdma_len
> pmtu
)
1601 qp
->r_psn
+= (qp
->s_rdma_len
- 1) / pmtu
;
1603 qp
->s_rdma_sge
.sg_list
= NULL
;
1604 qp
->s_rdma_sge
.num_sge
= 0;
1605 qp
->s_rdma_sge
.sge
.mr
= NULL
;
1606 qp
->s_rdma_sge
.sge
.vaddr
= NULL
;
1607 qp
->s_rdma_sge
.sge
.length
= 0;
1608 qp
->s_rdma_sge
.sge
.sge_length
= 0;
1611 * We need to increment the MSN here instead of when we
1612 * finish sending the result since a duplicate request would
1613 * increment it more than once.
1617 qp
->s_ack_state
= opcode
;
1618 qp
->s_ack_psn
= psn
;
1619 spin_unlock_irq(&qp
->s_lock
);
1622 qp
->r_state
= opcode
;
1623 qp
->r_nak_state
= 0;
1625 /* Call ipath_do_rc_send() in another thread. */
1626 tasklet_hi_schedule(&qp
->s_task
);
1630 case OP(COMPARE_SWAP
):
1631 case OP(FETCH_ADD
): {
1632 struct ib_atomic_eth
*ateth
;
1637 if (!header_in_data
)
1638 ateth
= &ohdr
->u
.atomic_eth
;
1640 ateth
= (struct ib_atomic_eth
*)data
;
1641 data
+= sizeof(*ateth
);
1643 vaddr
= be64_to_cpu(ateth
->vaddr
);
1644 if (unlikely(vaddr
& (sizeof(u64
) - 1)))
1646 rkey
= be32_to_cpu(ateth
->rkey
);
1647 /* Check rkey & NAK */
1648 if (unlikely(!ipath_rkey_ok(qp
, &qp
->r_sge
,
1649 sizeof(u64
), vaddr
, rkey
,
1650 IB_ACCESS_REMOTE_ATOMIC
)))
1652 if (unlikely(!(qp
->qp_access_flags
&
1653 IB_ACCESS_REMOTE_ATOMIC
)))
1655 /* Perform atomic OP and save result. */
1656 sdata
= be64_to_cpu(ateth
->swap_data
);
1657 spin_lock_irq(&dev
->pending_lock
);
1658 qp
->r_atomic_data
= *(u64
*) qp
->r_sge
.sge
.vaddr
;
1659 if (opcode
== OP(FETCH_ADD
))
1660 *(u64
*) qp
->r_sge
.sge
.vaddr
=
1661 qp
->r_atomic_data
+ sdata
;
1662 else if (qp
->r_atomic_data
==
1663 be64_to_cpu(ateth
->compare_data
))
1664 *(u64
*) qp
->r_sge
.sge
.vaddr
= sdata
;
1665 spin_unlock_irq(&dev
->pending_lock
);
1667 qp
->r_atomic_psn
= psn
& IPATH_PSN_MASK
;
1673 /* Drop packet for unknown opcodes. */
1677 qp
->r_state
= opcode
;
1678 qp
->r_nak_state
= 0;
1679 /* Send an ACK if requested or required. */
1680 if (psn
& (1 << 31)) {
1682 * Coalesce ACKs unless there is a RDMA READ or
1685 if (qp
->r_ack_state
< OP(COMPARE_SWAP
)) {
1686 qp
->r_ack_state
= opcode
;
1687 qp
->r_ack_psn
= psn
;
1695 * A NAK will ACK earlier sends and RDMA writes.
1696 * Don't queue the NAK if a RDMA read, atomic, or NAK
1697 * is pending though.
1699 if (qp
->r_ack_state
< OP(COMPARE_SWAP
)) {
1700 ipath_rc_error(qp
, IB_WC_REM_ACCESS_ERR
);
1701 qp
->r_ack_state
= OP(RDMA_WRITE_ONLY
);
1702 qp
->r_nak_state
= IB_NAK_REMOTE_ACCESS_ERROR
;
1703 qp
->r_ack_psn
= qp
->r_psn
;
1706 /* Send ACK right away unless the send tasklet has a pending ACK. */
1707 if (qp
->s_ack_state
== OP(ACKNOWLEDGE
))