2 * Copyright(c) 2015 - 2018 Intel Corporation.
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 #include "verbs_txreq.h"
52 /* cut down ridiculously long IB macro names */
53 #define OP(x) UC_OP(x)
56 * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
57 * @qp: a pointer to the QP
59 * Assume s_lock is held.
61 * Return 1 if constructed; otherwise, return 0.
63 int hfi1_make_uc_req(struct rvt_qp
*qp
, struct hfi1_pkt_state
*ps
)
65 struct hfi1_qp_priv
*priv
= qp
->priv
;
66 struct ib_other_headers
*ohdr
;
74 ps
->s_txreq
= get_txreq(ps
->dev
, qp
);
78 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_SEND_OK
)) {
79 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_FLUSH_SEND
))
81 /* We are in the error state, flush the work request. */
82 if (qp
->s_last
== READ_ONCE(qp
->s_head
))
84 /* If DMAs are in progress, we can't flush immediately. */
85 if (iowait_sdma_pending(&priv
->s_iowait
)) {
86 qp
->s_flags
|= RVT_S_WAIT_DMA
;
90 wqe
= rvt_get_swqe_ptr(qp
, qp
->s_last
);
91 rvt_send_complete(qp
, wqe
, IB_WC_WR_FLUSH_ERR
);
95 if (priv
->hdr_type
== HFI1_PKT_TYPE_9B
) {
96 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
98 if (rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) & IB_AH_GRH
)
99 ohdr
= &ps
->s_txreq
->phdr
.hdr
.ibh
.u
.l
.oth
;
101 ohdr
= &ps
->s_txreq
->phdr
.hdr
.ibh
.u
.oth
;
103 /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
105 if ((rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) & IB_AH_GRH
) &&
106 (hfi1_check_mcast(rdma_ah_get_dlid(&qp
->remote_ah_attr
))))
107 ohdr
= &ps
->s_txreq
->phdr
.hdr
.opah
.u
.l
.oth
;
109 ohdr
= &ps
->s_txreq
->phdr
.hdr
.opah
.u
.oth
;
112 /* Get the next send request. */
113 wqe
= rvt_get_swqe_ptr(qp
, qp
->s_cur
);
115 switch (qp
->s_state
) {
117 if (!(ib_rvt_state_ops
[qp
->state
] &
118 RVT_PROCESS_NEXT_SEND_OK
))
120 /* Check if send work queue is empty. */
121 if (qp
->s_cur
== READ_ONCE(qp
->s_head
)) {
126 * Local operations are processed immediately
127 * after all prior requests have completed.
129 if (wqe
->wr
.opcode
== IB_WR_REG_MR
||
130 wqe
->wr
.opcode
== IB_WR_LOCAL_INV
) {
134 if (qp
->s_last
!= qp
->s_cur
)
136 if (++qp
->s_cur
== qp
->s_size
)
138 if (!(wqe
->wr
.send_flags
& RVT_SEND_COMPLETION_ONLY
)) {
139 err
= rvt_invalidate_rkey(
140 qp
, wqe
->wr
.ex
.invalidate_rkey
);
143 rvt_send_complete(qp
, wqe
, err
? IB_WC_LOC_PROT_ERR
146 atomic_dec(&qp
->local_ops_pending
);
150 * Start a new request.
152 qp
->s_psn
= wqe
->psn
;
153 qp
->s_sge
.sge
= wqe
->sg_list
[0];
154 qp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
155 qp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
156 qp
->s_sge
.total_len
= wqe
->length
;
159 switch (wqe
->wr
.opcode
) {
161 case IB_WR_SEND_WITH_IMM
:
163 qp
->s_state
= OP(SEND_FIRST
);
167 if (wqe
->wr
.opcode
== IB_WR_SEND
) {
168 qp
->s_state
= OP(SEND_ONLY
);
171 OP(SEND_ONLY_WITH_IMMEDIATE
);
172 /* Immediate data comes after the BTH */
173 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
176 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
177 bth0
|= IB_BTH_SOLICITED
;
179 if (++qp
->s_cur
>= qp
->s_size
)
183 case IB_WR_RDMA_WRITE
:
184 case IB_WR_RDMA_WRITE_WITH_IMM
:
185 ohdr
->u
.rc
.reth
.vaddr
=
186 cpu_to_be64(wqe
->rdma_wr
.remote_addr
);
187 ohdr
->u
.rc
.reth
.rkey
=
188 cpu_to_be32(wqe
->rdma_wr
.rkey
);
189 ohdr
->u
.rc
.reth
.length
= cpu_to_be32(len
);
190 hwords
+= sizeof(struct ib_reth
) / 4;
192 qp
->s_state
= OP(RDMA_WRITE_FIRST
);
196 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
) {
197 qp
->s_state
= OP(RDMA_WRITE_ONLY
);
200 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
);
201 /* Immediate data comes after the RETH */
202 ohdr
->u
.rc
.imm_data
= wqe
->wr
.ex
.imm_data
;
204 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
205 bth0
|= IB_BTH_SOLICITED
;
208 if (++qp
->s_cur
>= qp
->s_size
)
218 qp
->s_state
= OP(SEND_MIDDLE
);
220 case OP(SEND_MIDDLE
):
224 middle
= HFI1_CAP_IS_KSET(SDMA_AHG
);
227 if (wqe
->wr
.opcode
== IB_WR_SEND
) {
228 qp
->s_state
= OP(SEND_LAST
);
230 qp
->s_state
= OP(SEND_LAST_WITH_IMMEDIATE
);
231 /* Immediate data comes after the BTH */
232 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
235 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
236 bth0
|= IB_BTH_SOLICITED
;
238 if (++qp
->s_cur
>= qp
->s_size
)
242 case OP(RDMA_WRITE_FIRST
):
243 qp
->s_state
= OP(RDMA_WRITE_MIDDLE
);
245 case OP(RDMA_WRITE_MIDDLE
):
249 middle
= HFI1_CAP_IS_KSET(SDMA_AHG
);
252 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
) {
253 qp
->s_state
= OP(RDMA_WRITE_LAST
);
256 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
);
257 /* Immediate data comes after the BTH */
258 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
260 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
261 bth0
|= IB_BTH_SOLICITED
;
264 if (++qp
->s_cur
>= qp
->s_size
)
269 ps
->s_txreq
->hdr_dwords
= hwords
;
270 ps
->s_txreq
->sde
= priv
->s_sde
;
271 ps
->s_txreq
->ss
= &qp
->s_sge
;
272 ps
->s_txreq
->s_cur_size
= len
;
273 hfi1_make_ruc_header(qp
, ohdr
, bth0
| (qp
->s_state
<< 24),
274 qp
->remote_qpn
, mask_psn(qp
->s_psn
++),
279 hfi1_put_txreq(ps
->s_txreq
);
284 hfi1_put_txreq(ps
->s_txreq
);
288 qp
->s_flags
&= ~RVT_S_BUSY
;
293 * hfi1_uc_rcv - handle an incoming UC packet
294 * @ibp: the port the packet came in on
295 * @hdr: the header of the packet
296 * @rcv_flags: flags relevant to rcv processing
297 * @data: the packet data
298 * @tlen: the length of the packet
299 * @qp: the QP for this packet.
301 * This is called from qp_rcv() to process an incoming UC packet
303 * Called at interrupt level.
305 void hfi1_uc_rcv(struct hfi1_packet
*packet
)
307 struct hfi1_ibport
*ibp
= rcd_to_iport(packet
->rcd
);
308 void *data
= packet
->payload
;
309 u32 tlen
= packet
->tlen
;
310 struct rvt_qp
*qp
= packet
->qp
;
311 struct ib_other_headers
*ohdr
= packet
->ohdr
;
312 u32 opcode
= packet
->opcode
;
313 u32 hdrsize
= packet
->hlen
;
315 u32 pad
= packet
->pad
;
318 struct ib_reth
*reth
;
320 u8 extra_bytes
= pad
+ packet
->extra_byte
+ (SIZE_OF_CRC
<< 2);
322 if (hfi1_ruc_check_hdr(ibp
, packet
))
325 process_ecn(qp
, packet
);
327 psn
= ib_bth_get_psn(ohdr
);
328 /* Compare the PSN verses the expected PSN. */
329 if (unlikely(cmp_psn(psn
, qp
->r_psn
) != 0)) {
331 * Handle a sequence error.
332 * Silently drop any current message.
336 if (qp
->r_state
== OP(SEND_FIRST
) ||
337 qp
->r_state
== OP(SEND_MIDDLE
)) {
338 set_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
);
339 qp
->r_sge
.num_sge
= 0;
341 rvt_put_ss(&qp
->r_sge
);
343 qp
->r_state
= OP(SEND_LAST
);
347 case OP(SEND_ONLY_WITH_IMMEDIATE
):
350 case OP(RDMA_WRITE_FIRST
):
351 case OP(RDMA_WRITE_ONLY
):
352 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
):
360 /* Check for opcode sequence errors. */
361 switch (qp
->r_state
) {
363 case OP(SEND_MIDDLE
):
364 if (opcode
== OP(SEND_MIDDLE
) ||
365 opcode
== OP(SEND_LAST
) ||
366 opcode
== OP(SEND_LAST_WITH_IMMEDIATE
))
370 case OP(RDMA_WRITE_FIRST
):
371 case OP(RDMA_WRITE_MIDDLE
):
372 if (opcode
== OP(RDMA_WRITE_MIDDLE
) ||
373 opcode
== OP(RDMA_WRITE_LAST
) ||
374 opcode
== OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
))
379 if (opcode
== OP(SEND_FIRST
) ||
380 opcode
== OP(SEND_ONLY
) ||
381 opcode
== OP(SEND_ONLY_WITH_IMMEDIATE
) ||
382 opcode
== OP(RDMA_WRITE_FIRST
) ||
383 opcode
== OP(RDMA_WRITE_ONLY
) ||
384 opcode
== OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
))
389 if (qp
->state
== IB_QPS_RTR
&& !(qp
->r_flags
& RVT_R_COMM_EST
))
392 /* OK, process the packet. */
396 case OP(SEND_ONLY_WITH_IMMEDIATE
):
398 if (test_and_clear_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
)) {
399 qp
->r_sge
= qp
->s_rdma_read_sge
;
401 ret
= rvt_get_rwqe(qp
, false);
407 * qp->s_rdma_read_sge will be the owner
408 * of the mr references.
410 qp
->s_rdma_read_sge
= qp
->r_sge
;
413 if (opcode
== OP(SEND_ONLY
))
414 goto no_immediate_data
;
415 else if (opcode
== OP(SEND_ONLY_WITH_IMMEDIATE
))
418 case OP(SEND_MIDDLE
):
419 /* Check for invalid length PMTU or posted rwqe len. */
421 * There will be no padding for 9B packet but 16B packets
422 * will come in with some padding since we always add
423 * CRC and LT bytes which will need to be flit aligned
425 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ extra_bytes
)))
427 qp
->r_rcv_len
+= pmtu
;
428 if (unlikely(qp
->r_rcv_len
> qp
->r_len
))
430 rvt_copy_sge(qp
, &qp
->r_sge
, data
, pmtu
, false, false);
433 case OP(SEND_LAST_WITH_IMMEDIATE
):
435 wc
.ex
.imm_data
= ohdr
->u
.imm_data
;
436 wc
.wc_flags
= IB_WC_WITH_IMM
;
443 /* Check for invalid length. */
444 /* LAST len should be >= 1 */
445 if (unlikely(tlen
< (hdrsize
+ extra_bytes
)))
447 /* Don't count the CRC. */
448 tlen
-= (hdrsize
+ extra_bytes
);
449 wc
.byte_len
= tlen
+ qp
->r_rcv_len
;
450 if (unlikely(wc
.byte_len
> qp
->r_len
))
452 wc
.opcode
= IB_WC_RECV
;
453 rvt_copy_sge(qp
, &qp
->r_sge
, data
, tlen
, false, false);
454 rvt_put_ss(&qp
->s_rdma_read_sge
);
456 wc
.wr_id
= qp
->r_wr_id
;
457 wc
.status
= IB_WC_SUCCESS
;
459 wc
.src_qp
= qp
->remote_qpn
;
460 wc
.slid
= rdma_ah_get_dlid(&qp
->remote_ah_attr
) & U16_MAX
;
462 * It seems that IB mandates the presence of an SL in a
463 * work completion only for the UD transport (see section
464 * 11.4.2 of IBTA Vol. 1).
466 * However, the way the SL is chosen below is consistent
467 * with the way that IB/qib works and is trying avoid
468 * introducing incompatibilities.
470 * See also OPA Vol. 1, section 9.7.6, and table 9-17.
472 wc
.sl
= rdma_ah_get_sl(&qp
->remote_ah_attr
);
473 /* zero fields that are N/A */
476 wc
.dlid_path_bits
= 0;
478 /* Signal completion event if the solicited bit is set. */
479 rvt_recv_cq(qp
, &wc
, ib_bth_is_solicited(ohdr
));
482 case OP(RDMA_WRITE_FIRST
):
483 case OP(RDMA_WRITE_ONLY
):
484 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
): /* consume RWQE */
486 if (unlikely(!(qp
->qp_access_flags
&
487 IB_ACCESS_REMOTE_WRITE
))) {
490 reth
= &ohdr
->u
.rc
.reth
;
491 qp
->r_len
= be32_to_cpu(reth
->length
);
493 qp
->r_sge
.sg_list
= NULL
;
494 if (qp
->r_len
!= 0) {
495 u32 rkey
= be32_to_cpu(reth
->rkey
);
496 u64 vaddr
= be64_to_cpu(reth
->vaddr
);
500 ok
= rvt_rkey_ok(qp
, &qp
->r_sge
.sge
, qp
->r_len
,
501 vaddr
, rkey
, IB_ACCESS_REMOTE_WRITE
);
504 qp
->r_sge
.num_sge
= 1;
506 qp
->r_sge
.num_sge
= 0;
507 qp
->r_sge
.sge
.mr
= NULL
;
508 qp
->r_sge
.sge
.vaddr
= NULL
;
509 qp
->r_sge
.sge
.length
= 0;
510 qp
->r_sge
.sge
.sge_length
= 0;
512 if (opcode
== OP(RDMA_WRITE_ONLY
)) {
514 } else if (opcode
== OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
)) {
515 wc
.ex
.imm_data
= ohdr
->u
.rc
.imm_data
;
519 case OP(RDMA_WRITE_MIDDLE
):
520 /* Check for invalid length PMTU or posted rwqe len. */
521 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ 4)))
523 qp
->r_rcv_len
+= pmtu
;
524 if (unlikely(qp
->r_rcv_len
> qp
->r_len
))
526 rvt_copy_sge(qp
, &qp
->r_sge
, data
, pmtu
, true, false);
529 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
):
530 wc
.ex
.imm_data
= ohdr
->u
.imm_data
;
532 wc
.wc_flags
= IB_WC_WITH_IMM
;
534 /* Check for invalid length. */
535 /* LAST len should be >= 1 */
536 if (unlikely(tlen
< (hdrsize
+ pad
+ 4)))
538 /* Don't count the CRC. */
539 tlen
-= (hdrsize
+ extra_bytes
);
540 if (unlikely(tlen
+ qp
->r_rcv_len
!= qp
->r_len
))
542 if (test_and_clear_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
)) {
543 rvt_put_ss(&qp
->s_rdma_read_sge
);
545 ret
= rvt_get_rwqe(qp
, true);
551 wc
.byte_len
= qp
->r_len
;
552 wc
.opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
553 rvt_copy_sge(qp
, &qp
->r_sge
, data
, tlen
, true, false);
554 rvt_put_ss(&qp
->r_sge
);
557 case OP(RDMA_WRITE_LAST
):
559 /* Check for invalid length. */
560 /* LAST len should be >= 1 */
561 if (unlikely(tlen
< (hdrsize
+ pad
+ 4)))
563 /* Don't count the CRC. */
564 tlen
-= (hdrsize
+ extra_bytes
);
565 if (unlikely(tlen
+ qp
->r_rcv_len
!= qp
->r_len
))
567 rvt_copy_sge(qp
, &qp
->r_sge
, data
, tlen
, true, false);
568 rvt_put_ss(&qp
->r_sge
);
572 /* Drop packet for unknown opcodes. */
576 qp
->r_state
= opcode
;
580 set_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
);
581 qp
->r_sge
.num_sge
= 0;
583 ibp
->rvp
.n_pkt_drops
++;
587 rvt_rc_error(qp
, IB_WC_LOC_QP_OP_ERR
);