2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include "ipath_verbs.h"
35 #include "ipath_kernel.h"
37 /* cut down ridiculously long IB macro names */
38 #define OP(x) IB_OPCODE_UC_##x
41 * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
42 * @qp: a pointer to the QP
44 * Return 1 if constructed; otherwise, return 0.
46 int ipath_make_uc_req(struct ipath_qp
*qp
)
48 struct ipath_other_headers
*ohdr
;
49 struct ipath_swqe
*wqe
;
54 u32 pmtu
= ib_mtu_enum_to_int(qp
->path_mtu
);
57 spin_lock_irqsave(&qp
->s_lock
, flags
);
59 if (!(ib_ipath_state_ops
[qp
->state
] & IPATH_PROCESS_SEND_OK
)) {
60 if (!(ib_ipath_state_ops
[qp
->state
] & IPATH_FLUSH_SEND
))
62 /* We are in the error state, flush the work request. */
63 if (qp
->s_last
== qp
->s_head
)
65 /* If DMAs are in progress, we can't flush immediately. */
66 if (atomic_read(&qp
->s_dma_busy
)) {
67 qp
->s_flags
|= IPATH_S_WAIT_DMA
;
70 wqe
= get_swqe_ptr(qp
, qp
->s_last
);
71 ipath_send_complete(qp
, wqe
, IB_WC_WR_FLUSH_ERR
);
75 ohdr
= &qp
->s_hdr
.u
.oth
;
76 if (qp
->remote_ah_attr
.ah_flags
& IB_AH_GRH
)
77 ohdr
= &qp
->s_hdr
.u
.l
.oth
;
79 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
81 bth0
= 1 << 22; /* Set M bit */
83 /* Get the next send request. */
84 wqe
= get_swqe_ptr(qp
, qp
->s_cur
);
86 switch (qp
->s_state
) {
88 if (!(ib_ipath_state_ops
[qp
->state
] &
89 IPATH_PROCESS_NEXT_SEND_OK
))
91 /* Check if send work queue is empty. */
92 if (qp
->s_cur
== qp
->s_head
)
95 * Start a new request.
97 qp
->s_psn
= wqe
->psn
= qp
->s_next_psn
;
98 qp
->s_sge
.sge
= wqe
->sg_list
[0];
99 qp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
100 qp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
101 qp
->s_len
= len
= wqe
->length
;
102 switch (wqe
->wr
.opcode
) {
104 case IB_WR_SEND_WITH_IMM
:
106 qp
->s_state
= OP(SEND_FIRST
);
110 if (wqe
->wr
.opcode
== IB_WR_SEND
)
111 qp
->s_state
= OP(SEND_ONLY
);
114 OP(SEND_ONLY_WITH_IMMEDIATE
);
115 /* Immediate data comes after the BTH */
116 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
119 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
122 if (++qp
->s_cur
>= qp
->s_size
)
126 case IB_WR_RDMA_WRITE
:
127 case IB_WR_RDMA_WRITE_WITH_IMM
:
128 ohdr
->u
.rc
.reth
.vaddr
=
129 cpu_to_be64(wqe
->wr
.wr
.rdma
.remote_addr
);
130 ohdr
->u
.rc
.reth
.rkey
=
131 cpu_to_be32(wqe
->wr
.wr
.rdma
.rkey
);
132 ohdr
->u
.rc
.reth
.length
= cpu_to_be32(len
);
133 hwords
+= sizeof(struct ib_reth
) / 4;
135 qp
->s_state
= OP(RDMA_WRITE_FIRST
);
139 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
)
140 qp
->s_state
= OP(RDMA_WRITE_ONLY
);
143 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
);
144 /* Immediate data comes after the RETH */
145 ohdr
->u
.rc
.imm_data
= wqe
->wr
.ex
.imm_data
;
147 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
151 if (++qp
->s_cur
>= qp
->s_size
)
161 qp
->s_state
= OP(SEND_MIDDLE
);
163 case OP(SEND_MIDDLE
):
169 if (wqe
->wr
.opcode
== IB_WR_SEND
)
170 qp
->s_state
= OP(SEND_LAST
);
172 qp
->s_state
= OP(SEND_LAST_WITH_IMMEDIATE
);
173 /* Immediate data comes after the BTH */
174 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
177 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
180 if (++qp
->s_cur
>= qp
->s_size
)
184 case OP(RDMA_WRITE_FIRST
):
185 qp
->s_state
= OP(RDMA_WRITE_MIDDLE
);
187 case OP(RDMA_WRITE_MIDDLE
):
193 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
)
194 qp
->s_state
= OP(RDMA_WRITE_LAST
);
197 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
);
198 /* Immediate data comes after the BTH */
199 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
201 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
205 if (++qp
->s_cur
>= qp
->s_size
)
210 qp
->s_hdrwords
= hwords
;
211 qp
->s_cur_sge
= &qp
->s_sge
;
212 qp
->s_cur_size
= len
;
213 ipath_make_ruc_header(to_idev(qp
->ibqp
.device
),
214 qp
, ohdr
, bth0
| (qp
->s_state
<< 24),
215 qp
->s_next_psn
++ & IPATH_PSN_MASK
);
221 qp
->s_flags
&= ~IPATH_S_BUSY
;
223 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
228 * ipath_uc_rcv - handle an incoming UC packet
229 * @dev: the device the packet came in on
230 * @hdr: the header of the packet
231 * @has_grh: true if the packet has a GRH
232 * @data: the packet data
233 * @tlen: the length of the packet
234 * @qp: the QP for this packet.
236 * This is called from ipath_qp_rcv() to process an incoming UC packet
238 * Called at interrupt level.
240 void ipath_uc_rcv(struct ipath_ibdev
*dev
, struct ipath_ib_header
*hdr
,
241 int has_grh
, void *data
, u32 tlen
, struct ipath_qp
*qp
)
243 struct ipath_other_headers
*ohdr
;
249 u32 pmtu
= ib_mtu_enum_to_int(qp
->path_mtu
);
250 struct ib_reth
*reth
;
253 /* Validate the SLID. See Ch. 9.6.1.5 */
254 if (unlikely(be16_to_cpu(hdr
->lrh
[3]) != qp
->remote_ah_attr
.dlid
))
260 hdrsize
= 8 + 12; /* LRH + BTH */
261 psn
= be32_to_cpu(ohdr
->bth
[2]);
264 ohdr
= &hdr
->u
.l
.oth
;
265 hdrsize
= 8 + 40 + 12; /* LRH + GRH + BTH */
267 * The header with GRH is 60 bytes and the
268 * core driver sets the eager header buffer
269 * size to 56 bytes so the last 4 bytes of
270 * the BTH header (PSN) is in the data buffer.
272 header_in_data
= dev
->dd
->ipath_rcvhdrentsize
== 16;
273 if (header_in_data
) {
274 psn
= be32_to_cpu(((__be32
*) data
)[0]);
275 data
+= sizeof(__be32
);
277 psn
= be32_to_cpu(ohdr
->bth
[2]);
280 * The opcode is in the low byte when its in network order
281 * (top byte when in host order).
283 opcode
= be32_to_cpu(ohdr
->bth
[0]) >> 24;
285 memset(&wc
, 0, sizeof wc
);
287 /* Compare the PSN verses the expected PSN. */
288 if (unlikely(ipath_cmp24(psn
, qp
->r_psn
) != 0)) {
290 * Handle a sequence error.
291 * Silently drop any current message.
295 qp
->r_state
= OP(SEND_LAST
);
299 case OP(SEND_ONLY_WITH_IMMEDIATE
):
302 case OP(RDMA_WRITE_FIRST
):
303 case OP(RDMA_WRITE_ONLY
):
304 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
):
313 /* Check for opcode sequence errors. */
314 switch (qp
->r_state
) {
316 case OP(SEND_MIDDLE
):
317 if (opcode
== OP(SEND_MIDDLE
) ||
318 opcode
== OP(SEND_LAST
) ||
319 opcode
== OP(SEND_LAST_WITH_IMMEDIATE
))
323 case OP(RDMA_WRITE_FIRST
):
324 case OP(RDMA_WRITE_MIDDLE
):
325 if (opcode
== OP(RDMA_WRITE_MIDDLE
) ||
326 opcode
== OP(RDMA_WRITE_LAST
) ||
327 opcode
== OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
))
332 if (opcode
== OP(SEND_FIRST
) ||
333 opcode
== OP(SEND_ONLY
) ||
334 opcode
== OP(SEND_ONLY_WITH_IMMEDIATE
) ||
335 opcode
== OP(RDMA_WRITE_FIRST
) ||
336 opcode
== OP(RDMA_WRITE_ONLY
) ||
337 opcode
== OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
))
342 /* OK, process the packet. */
346 case OP(SEND_ONLY_WITH_IMMEDIATE
):
348 if (qp
->r_flags
& IPATH_R_REUSE_SGE
) {
349 qp
->r_flags
&= ~IPATH_R_REUSE_SGE
;
350 qp
->r_sge
= qp
->s_rdma_read_sge
;
351 } else if (!ipath_get_rwqe(qp
, 0)) {
355 /* Save the WQE so we can reuse it in case of an error. */
356 qp
->s_rdma_read_sge
= qp
->r_sge
;
358 if (opcode
== OP(SEND_ONLY
))
360 else if (opcode
== OP(SEND_ONLY_WITH_IMMEDIATE
))
363 case OP(SEND_MIDDLE
):
364 /* Check for invalid length PMTU or posted rwqe len. */
365 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ 4))) {
366 qp
->r_flags
|= IPATH_R_REUSE_SGE
;
370 qp
->r_rcv_len
+= pmtu
;
371 if (unlikely(qp
->r_rcv_len
> qp
->r_len
)) {
372 qp
->r_flags
|= IPATH_R_REUSE_SGE
;
376 ipath_copy_sge(&qp
->r_sge
, data
, pmtu
);
379 case OP(SEND_LAST_WITH_IMMEDIATE
):
381 if (header_in_data
) {
382 wc
.ex
.imm_data
= *(__be32
*) data
;
383 data
+= sizeof(__be32
);
385 /* Immediate data comes after BTH */
386 wc
.ex
.imm_data
= ohdr
->u
.imm_data
;
389 wc
.wc_flags
= IB_WC_WITH_IMM
;
393 /* Get the number of bytes the message was padded by. */
394 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
395 /* Check for invalid length. */
396 /* XXX LAST len should be >= 1 */
397 if (unlikely(tlen
< (hdrsize
+ pad
+ 4))) {
398 qp
->r_flags
|= IPATH_R_REUSE_SGE
;
402 /* Don't count the CRC. */
403 tlen
-= (hdrsize
+ pad
+ 4);
404 wc
.byte_len
= tlen
+ qp
->r_rcv_len
;
405 if (unlikely(wc
.byte_len
> qp
->r_len
)) {
406 qp
->r_flags
|= IPATH_R_REUSE_SGE
;
410 wc
.opcode
= IB_WC_RECV
;
412 ipath_copy_sge(&qp
->r_sge
, data
, tlen
);
413 wc
.wr_id
= qp
->r_wr_id
;
414 wc
.status
= IB_WC_SUCCESS
;
416 wc
.src_qp
= qp
->remote_qpn
;
417 wc
.slid
= qp
->remote_ah_attr
.dlid
;
418 wc
.sl
= qp
->remote_ah_attr
.sl
;
419 /* Signal completion event if the solicited bit is set. */
420 ipath_cq_enter(to_icq(qp
->ibqp
.recv_cq
), &wc
,
422 cpu_to_be32(1 << 23)) != 0);
425 case OP(RDMA_WRITE_FIRST
):
426 case OP(RDMA_WRITE_ONLY
):
427 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
): /* consume RWQE */
429 /* RETH comes after BTH */
431 reth
= &ohdr
->u
.rc
.reth
;
433 reth
= (struct ib_reth
*)data
;
434 data
+= sizeof(*reth
);
436 hdrsize
+= sizeof(*reth
);
437 qp
->r_len
= be32_to_cpu(reth
->length
);
439 if (qp
->r_len
!= 0) {
440 u32 rkey
= be32_to_cpu(reth
->rkey
);
441 u64 vaddr
= be64_to_cpu(reth
->vaddr
);
445 ok
= ipath_rkey_ok(qp
, &qp
->r_sge
, qp
->r_len
,
447 IB_ACCESS_REMOTE_WRITE
);
453 qp
->r_sge
.sg_list
= NULL
;
454 qp
->r_sge
.sge
.mr
= NULL
;
455 qp
->r_sge
.sge
.vaddr
= NULL
;
456 qp
->r_sge
.sge
.length
= 0;
457 qp
->r_sge
.sge
.sge_length
= 0;
459 if (unlikely(!(qp
->qp_access_flags
&
460 IB_ACCESS_REMOTE_WRITE
))) {
464 if (opcode
== OP(RDMA_WRITE_ONLY
))
466 else if (opcode
== OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
))
469 case OP(RDMA_WRITE_MIDDLE
):
470 /* Check for invalid length PMTU or posted rwqe len. */
471 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ 4))) {
475 qp
->r_rcv_len
+= pmtu
;
476 if (unlikely(qp
->r_rcv_len
> qp
->r_len
)) {
480 ipath_copy_sge(&qp
->r_sge
, data
, pmtu
);
483 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
):
485 if (header_in_data
) {
486 wc
.ex
.imm_data
= *(__be32
*) data
;
487 data
+= sizeof(__be32
);
489 /* Immediate data comes after BTH */
490 wc
.ex
.imm_data
= ohdr
->u
.imm_data
;
493 wc
.wc_flags
= IB_WC_WITH_IMM
;
495 /* Get the number of bytes the message was padded by. */
496 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
497 /* Check for invalid length. */
498 /* XXX LAST len should be >= 1 */
499 if (unlikely(tlen
< (hdrsize
+ pad
+ 4))) {
503 /* Don't count the CRC. */
504 tlen
-= (hdrsize
+ pad
+ 4);
505 if (unlikely(tlen
+ qp
->r_rcv_len
!= qp
->r_len
)) {
509 if (qp
->r_flags
& IPATH_R_REUSE_SGE
)
510 qp
->r_flags
&= ~IPATH_R_REUSE_SGE
;
511 else if (!ipath_get_rwqe(qp
, 1)) {
515 wc
.byte_len
= qp
->r_len
;
516 wc
.opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
519 case OP(RDMA_WRITE_LAST
):
521 /* Get the number of bytes the message was padded by. */
522 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
523 /* Check for invalid length. */
524 /* XXX LAST len should be >= 1 */
525 if (unlikely(tlen
< (hdrsize
+ pad
+ 4))) {
529 /* Don't count the CRC. */
530 tlen
-= (hdrsize
+ pad
+ 4);
531 if (unlikely(tlen
+ qp
->r_rcv_len
!= qp
->r_len
)) {
535 ipath_copy_sge(&qp
->r_sge
, data
, tlen
);
539 /* Drop packet for unknown opcodes. */
544 qp
->r_state
= opcode
;