2 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include "ipath_verbs.h"
34 #include "ips_common.h"
36 /* cut down ridiculously long IB macro names */
37 #define OP(x) IB_OPCODE_UC_##x
39 static void complete_last_send(struct ipath_qp
*qp
, struct ipath_swqe
*wqe
,
42 if (++qp
->s_last
== qp
->s_size
)
44 if (!test_bit(IPATH_S_SIGNAL_REQ_WR
, &qp
->s_flags
) ||
45 (wqe
->wr
.send_flags
& IB_SEND_SIGNALED
)) {
46 wc
->wr_id
= wqe
->wr
.wr_id
;
47 wc
->status
= IB_WC_SUCCESS
;
48 wc
->opcode
= ib_ipath_wc_opcode
[wqe
->wr
.opcode
];
50 wc
->byte_len
= wqe
->length
;
51 wc
->qp_num
= qp
->ibqp
.qp_num
;
52 wc
->src_qp
= qp
->remote_qpn
;
54 wc
->slid
= qp
->remote_ah_attr
.dlid
;
55 wc
->sl
= qp
->remote_ah_attr
.sl
;
56 wc
->dlid_path_bits
= 0;
58 ipath_cq_enter(to_icq(qp
->ibqp
.send_cq
), wc
, 0);
60 wqe
= get_swqe_ptr(qp
, qp
->s_last
);
64 * ipath_do_uc_send - do a send on a UC queue
65 * @data: contains a pointer to the QP to send on
67 * Process entries in the send work queue until the queue is exhausted.
68 * Only allow one CPU to send a packet per QP (tasklet).
69 * Otherwise, after we drop the QP lock, two threads could send
70 * packets out of order.
71 * This is similar to ipath_do_rc_send() below except we don't have
72 * timeouts or resends.
74 void ipath_do_uc_send(unsigned long data
)
76 struct ipath_qp
*qp
= (struct ipath_qp
*)data
;
77 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
78 struct ipath_swqe
*wqe
;
86 u32 pmtu
= ib_mtu_enum_to_int(qp
->path_mtu
);
88 struct ipath_other_headers
*ohdr
;
91 if (test_and_set_bit(IPATH_S_BUSY
, &qp
->s_flags
))
94 if (unlikely(qp
->remote_ah_attr
.dlid
==
95 ipath_layer_get_lid(dev
->dd
))) {
96 /* Pass in an uninitialized ib_wc to save stack space. */
97 ipath_ruc_loopback(qp
, &wc
);
98 clear_bit(IPATH_S_BUSY
, &qp
->s_flags
);
102 ohdr
= &qp
->s_hdr
.u
.oth
;
103 if (qp
->remote_ah_attr
.ah_flags
& IB_AH_GRH
)
104 ohdr
= &qp
->s_hdr
.u
.l
.oth
;
107 /* Check for a constructed packet to be sent. */
108 if (qp
->s_hdrwords
!= 0) {
110 * If no PIO bufs are available, return.
111 * An interrupt will call ipath_ib_piobufavail()
112 * when one is available.
114 if (ipath_verbs_send(dev
->dd
, qp
->s_hdrwords
,
118 ipath_no_bufs_available(qp
, dev
);
121 dev
->n_unicast_xmit
++;
122 /* Record that we sent the packet and s_hdr is empty. */
127 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
131 * The lock is needed to synchronize between
132 * setting qp->s_ack_state and post_send().
134 spin_lock_irqsave(&qp
->s_lock
, flags
);
136 if (!(ib_ipath_state_ops
[qp
->state
] & IPATH_PROCESS_SEND_OK
))
139 bth0
= ipath_layer_get_pkey(dev
->dd
, qp
->s_pkey_index
);
141 /* Send a request. */
142 wqe
= get_swqe_ptr(qp
, qp
->s_last
);
143 switch (qp
->s_state
) {
146 * Signal the completion of the last send (if there is
149 if (qp
->s_last
!= qp
->s_tail
)
150 complete_last_send(qp
, wqe
, &wc
);
152 /* Check if send work queue is empty. */
153 if (qp
->s_tail
== qp
->s_head
)
156 * Start a new request.
158 qp
->s_psn
= wqe
->psn
= qp
->s_next_psn
;
159 qp
->s_sge
.sge
= wqe
->sg_list
[0];
160 qp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
161 qp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
162 qp
->s_len
= len
= wqe
->length
;
163 switch (wqe
->wr
.opcode
) {
165 case IB_WR_SEND_WITH_IMM
:
167 qp
->s_state
= OP(SEND_FIRST
);
171 if (wqe
->wr
.opcode
== IB_WR_SEND
)
172 qp
->s_state
= OP(SEND_ONLY
);
175 OP(SEND_ONLY_WITH_IMMEDIATE
);
176 /* Immediate data comes after the BTH */
177 ohdr
->u
.imm_data
= wqe
->wr
.imm_data
;
180 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
184 case IB_WR_RDMA_WRITE
:
185 case IB_WR_RDMA_WRITE_WITH_IMM
:
186 ohdr
->u
.rc
.reth
.vaddr
=
187 cpu_to_be64(wqe
->wr
.wr
.rdma
.remote_addr
);
188 ohdr
->u
.rc
.reth
.rkey
=
189 cpu_to_be32(wqe
->wr
.wr
.rdma
.rkey
);
190 ohdr
->u
.rc
.reth
.length
= cpu_to_be32(len
);
191 hwords
+= sizeof(struct ib_reth
) / 4;
193 qp
->s_state
= OP(RDMA_WRITE_FIRST
);
197 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
)
198 qp
->s_state
= OP(RDMA_WRITE_ONLY
);
201 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
);
202 /* Immediate data comes after the RETH */
203 ohdr
->u
.rc
.imm_data
= wqe
->wr
.imm_data
;
205 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
213 if (++qp
->s_tail
>= qp
->s_size
)
218 qp
->s_state
= OP(SEND_MIDDLE
);
220 case OP(SEND_MIDDLE
):
226 if (wqe
->wr
.opcode
== IB_WR_SEND
)
227 qp
->s_state
= OP(SEND_LAST
);
229 qp
->s_state
= OP(SEND_LAST_WITH_IMMEDIATE
);
230 /* Immediate data comes after the BTH */
231 ohdr
->u
.imm_data
= wqe
->wr
.imm_data
;
234 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
238 case OP(RDMA_WRITE_FIRST
):
239 qp
->s_state
= OP(RDMA_WRITE_MIDDLE
);
241 case OP(RDMA_WRITE_MIDDLE
):
247 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
)
248 qp
->s_state
= OP(RDMA_WRITE_LAST
);
251 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
);
252 /* Immediate data comes after the BTH */
253 ohdr
->u
.imm_data
= wqe
->wr
.imm_data
;
255 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
260 bth2
= qp
->s_next_psn
++ & IPS_PSN_MASK
;
262 bth0
|= qp
->s_state
<< 24;
264 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
266 /* Construct the header. */
267 extra_bytes
= (4 - len
) & 3;
268 nwords
= (len
+ extra_bytes
) >> 2;
269 if (unlikely(qp
->remote_ah_attr
.ah_flags
& IB_AH_GRH
)) {
270 /* Header size in 32-bit words. */
273 qp
->s_hdr
.u
.l
.grh
.version_tclass_flow
=
274 cpu_to_be32((6 << 28) |
275 (qp
->remote_ah_attr
.grh
.traffic_class
277 qp
->remote_ah_attr
.grh
.flow_label
);
278 qp
->s_hdr
.u
.l
.grh
.paylen
=
279 cpu_to_be16(((hwords
- 12) + nwords
+
281 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
282 qp
->s_hdr
.u
.l
.grh
.next_hdr
= 0x1B;
283 qp
->s_hdr
.u
.l
.grh
.hop_limit
=
284 qp
->remote_ah_attr
.grh
.hop_limit
;
285 /* The SGID is 32-bit aligned. */
286 qp
->s_hdr
.u
.l
.grh
.sgid
.global
.subnet_prefix
=
288 qp
->s_hdr
.u
.l
.grh
.sgid
.global
.interface_id
=
289 ipath_layer_get_guid(dev
->dd
);
290 qp
->s_hdr
.u
.l
.grh
.dgid
= qp
->remote_ah_attr
.grh
.dgid
;
292 qp
->s_hdrwords
= hwords
;
293 qp
->s_cur_sge
= &qp
->s_sge
;
294 qp
->s_cur_size
= len
;
295 lrh0
|= qp
->remote_ah_attr
.sl
<< 4;
296 qp
->s_hdr
.lrh
[0] = cpu_to_be16(lrh0
);
298 qp
->s_hdr
.lrh
[1] = cpu_to_be16(qp
->remote_ah_attr
.dlid
);
299 qp
->s_hdr
.lrh
[2] = cpu_to_be16(hwords
+ nwords
+ SIZE_OF_CRC
);
300 qp
->s_hdr
.lrh
[3] = cpu_to_be16(ipath_layer_get_lid(dev
->dd
));
301 bth0
|= extra_bytes
<< 20;
302 ohdr
->bth
[0] = cpu_to_be32(bth0
);
303 ohdr
->bth
[1] = cpu_to_be32(qp
->remote_qpn
);
304 ohdr
->bth
[2] = cpu_to_be32(bth2
);
306 /* Check for more work to do. */
310 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
311 clear_bit(IPATH_S_BUSY
, &qp
->s_flags
);
318 * ipath_uc_rcv - handle an incoming UC packet
319 * @dev: the device the packet came in on
320 * @hdr: the header of the packet
321 * @has_grh: true if the packet has a GRH
322 * @data: the packet data
323 * @tlen: the length of the packet
324 * @qp: the QP for this packet.
326 * This is called from ipath_qp_rcv() to process an incoming UC packet
328 * Called at interrupt level.
330 void ipath_uc_rcv(struct ipath_ibdev
*dev
, struct ipath_ib_header
*hdr
,
331 int has_grh
, void *data
, u32 tlen
, struct ipath_qp
*qp
)
333 struct ipath_other_headers
*ohdr
;
340 u32 pmtu
= ib_mtu_enum_to_int(qp
->path_mtu
);
341 struct ib_reth
*reth
;
347 hdrsize
= 8 + 12; /* LRH + BTH */
348 psn
= be32_to_cpu(ohdr
->bth
[2]);
351 ohdr
= &hdr
->u
.l
.oth
;
352 hdrsize
= 8 + 40 + 12; /* LRH + GRH + BTH */
354 * The header with GRH is 60 bytes and the
355 * core driver sets the eager header buffer
356 * size to 56 bytes so the last 4 bytes of
357 * the BTH header (PSN) is in the data buffer.
360 ipath_layer_get_rcvhdrentsize(dev
->dd
) == 16;
361 if (header_in_data
) {
362 psn
= be32_to_cpu(((__be32
*) data
)[0]);
363 data
+= sizeof(__be32
);
365 psn
= be32_to_cpu(ohdr
->bth
[2]);
368 * The opcode is in the low byte when its in network order
369 * (top byte when in host order).
371 opcode
= be32_to_cpu(ohdr
->bth
[0]) >> 24;
376 spin_lock_irqsave(&qp
->r_rq
.lock
, flags
);
378 /* Compare the PSN verses the expected PSN. */
379 if (unlikely(ipath_cmp24(psn
, qp
->r_psn
) != 0)) {
381 * Handle a sequence error.
382 * Silently drop any current message.
386 qp
->r_state
= OP(SEND_LAST
);
390 case OP(SEND_ONLY_WITH_IMMEDIATE
):
393 case OP(RDMA_WRITE_FIRST
):
394 case OP(RDMA_WRITE_ONLY
):
395 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
):
404 /* Check for opcode sequence errors. */
405 switch (qp
->r_state
) {
407 case OP(SEND_MIDDLE
):
408 if (opcode
== OP(SEND_MIDDLE
) ||
409 opcode
== OP(SEND_LAST
) ||
410 opcode
== OP(SEND_LAST_WITH_IMMEDIATE
))
414 case OP(RDMA_WRITE_FIRST
):
415 case OP(RDMA_WRITE_MIDDLE
):
416 if (opcode
== OP(RDMA_WRITE_MIDDLE
) ||
417 opcode
== OP(RDMA_WRITE_LAST
) ||
418 opcode
== OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
))
423 if (opcode
== OP(SEND_FIRST
) ||
424 opcode
== OP(SEND_ONLY
) ||
425 opcode
== OP(SEND_ONLY_WITH_IMMEDIATE
) ||
426 opcode
== OP(RDMA_WRITE_FIRST
) ||
427 opcode
== OP(RDMA_WRITE_ONLY
) ||
428 opcode
== OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
))
433 /* OK, process the packet. */
437 case OP(SEND_ONLY_WITH_IMMEDIATE
):
439 if (qp
->r_reuse_sge
) {
441 qp
->r_sge
= qp
->s_rdma_sge
;
442 } else if (!ipath_get_rwqe(qp
, 0)) {
446 /* Save the WQE so we can reuse it in case of an error. */
447 qp
->s_rdma_sge
= qp
->r_sge
;
449 if (opcode
== OP(SEND_ONLY
))
451 else if (opcode
== OP(SEND_ONLY_WITH_IMMEDIATE
))
454 case OP(SEND_MIDDLE
):
455 /* Check for invalid length PMTU or posted rwqe len. */
456 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ 4))) {
461 qp
->r_rcv_len
+= pmtu
;
462 if (unlikely(qp
->r_rcv_len
> qp
->r_len
)) {
467 ipath_copy_sge(&qp
->r_sge
, data
, pmtu
);
470 case OP(SEND_LAST_WITH_IMMEDIATE
):
472 if (header_in_data
) {
473 wc
.imm_data
= *(__be32
*) data
;
474 data
+= sizeof(__be32
);
476 /* Immediate data comes after BTH */
477 wc
.imm_data
= ohdr
->u
.imm_data
;
480 wc
.wc_flags
= IB_WC_WITH_IMM
;
484 /* Get the number of bytes the message was padded by. */
485 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
486 /* Check for invalid length. */
487 /* XXX LAST len should be >= 1 */
488 if (unlikely(tlen
< (hdrsize
+ pad
+ 4))) {
493 /* Don't count the CRC. */
494 tlen
-= (hdrsize
+ pad
+ 4);
495 wc
.byte_len
= tlen
+ qp
->r_rcv_len
;
496 if (unlikely(wc
.byte_len
> qp
->r_len
)) {
501 /* XXX Need to free SGEs */
503 ipath_copy_sge(&qp
->r_sge
, data
, tlen
);
504 wc
.wr_id
= qp
->r_wr_id
;
505 wc
.status
= IB_WC_SUCCESS
;
506 wc
.opcode
= IB_WC_RECV
;
508 wc
.qp_num
= qp
->ibqp
.qp_num
;
509 wc
.src_qp
= qp
->remote_qpn
;
511 wc
.slid
= qp
->remote_ah_attr
.dlid
;
512 wc
.sl
= qp
->remote_ah_attr
.sl
;
513 wc
.dlid_path_bits
= 0;
515 /* Signal completion event if the solicited bit is set. */
516 ipath_cq_enter(to_icq(qp
->ibqp
.recv_cq
), &wc
,
518 __constant_cpu_to_be32(1 << 23)) != 0);
521 case OP(RDMA_WRITE_FIRST
):
522 case OP(RDMA_WRITE_ONLY
):
523 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
): /* consume RWQE */
525 /* RETH comes after BTH */
527 reth
= &ohdr
->u
.rc
.reth
;
529 reth
= (struct ib_reth
*)data
;
530 data
+= sizeof(*reth
);
532 hdrsize
+= sizeof(*reth
);
533 qp
->r_len
= be32_to_cpu(reth
->length
);
535 if (qp
->r_len
!= 0) {
536 u32 rkey
= be32_to_cpu(reth
->rkey
);
537 u64 vaddr
= be64_to_cpu(reth
->vaddr
);
540 if (unlikely(!ipath_rkey_ok(
541 dev
, &qp
->r_sge
, qp
->r_len
,
543 IB_ACCESS_REMOTE_WRITE
))) {
548 qp
->r_sge
.sg_list
= NULL
;
549 qp
->r_sge
.sge
.mr
= NULL
;
550 qp
->r_sge
.sge
.vaddr
= NULL
;
551 qp
->r_sge
.sge
.length
= 0;
552 qp
->r_sge
.sge
.sge_length
= 0;
554 if (unlikely(!(qp
->qp_access_flags
&
555 IB_ACCESS_REMOTE_WRITE
))) {
559 if (opcode
== OP(RDMA_WRITE_ONLY
))
562 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
))
565 case OP(RDMA_WRITE_MIDDLE
):
566 /* Check for invalid length PMTU or posted rwqe len. */
567 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ 4))) {
571 qp
->r_rcv_len
+= pmtu
;
572 if (unlikely(qp
->r_rcv_len
> qp
->r_len
)) {
576 ipath_copy_sge(&qp
->r_sge
, data
, pmtu
);
579 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
):
581 /* Get the number of bytes the message was padded by. */
582 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
583 /* Check for invalid length. */
584 /* XXX LAST len should be >= 1 */
585 if (unlikely(tlen
< (hdrsize
+ pad
+ 4))) {
589 /* Don't count the CRC. */
590 tlen
-= (hdrsize
+ pad
+ 4);
591 if (unlikely(tlen
+ qp
->r_rcv_len
!= qp
->r_len
)) {
595 if (qp
->r_reuse_sge
) {
597 } else if (!ipath_get_rwqe(qp
, 1)) {
601 if (header_in_data
) {
602 wc
.imm_data
= *(__be32
*) data
;
603 data
+= sizeof(__be32
);
605 /* Immediate data comes after BTH */
606 wc
.imm_data
= ohdr
->u
.imm_data
;
609 wc
.wc_flags
= IB_WC_WITH_IMM
;
613 case OP(RDMA_WRITE_LAST
):
615 /* Get the number of bytes the message was padded by. */
616 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
617 /* Check for invalid length. */
618 /* XXX LAST len should be >= 1 */
619 if (unlikely(tlen
< (hdrsize
+ pad
+ 4))) {
623 /* Don't count the CRC. */
624 tlen
-= (hdrsize
+ pad
+ 4);
625 if (unlikely(tlen
+ qp
->r_rcv_len
!= qp
->r_len
)) {
629 ipath_copy_sge(&qp
->r_sge
, data
, tlen
);
633 /* Drop packet for unknown opcodes. */
634 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
639 qp
->r_state
= opcode
;
641 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);