1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
3 * Copyright(c) 2015 - 2018 Intel Corporation.
7 #include "verbs_txreq.h"
10 /* cut down ridiculously long IB macro names */
11 #define OP(x) UC_OP(x)
14 * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
15 * @qp: a pointer to the QP
16 * @ps: the current packet state
18 * Assume s_lock is held.
20 * Return 1 if constructed; otherwise, return 0.
22 int hfi1_make_uc_req(struct rvt_qp
*qp
, struct hfi1_pkt_state
*ps
)
24 struct hfi1_qp_priv
*priv
= qp
->priv
;
25 struct ib_other_headers
*ohdr
;
33 ps
->s_txreq
= get_txreq(ps
->dev
, qp
);
37 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_SEND_OK
)) {
38 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_FLUSH_SEND
))
40 /* We are in the error state, flush the work request. */
41 if (qp
->s_last
== READ_ONCE(qp
->s_head
))
43 /* If DMAs are in progress, we can't flush immediately. */
44 if (iowait_sdma_pending(&priv
->s_iowait
)) {
45 qp
->s_flags
|= RVT_S_WAIT_DMA
;
49 wqe
= rvt_get_swqe_ptr(qp
, qp
->s_last
);
50 rvt_send_complete(qp
, wqe
, IB_WC_WR_FLUSH_ERR
);
54 if (priv
->hdr_type
== HFI1_PKT_TYPE_9B
) {
55 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
57 if (rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) & IB_AH_GRH
)
58 ohdr
= &ps
->s_txreq
->phdr
.hdr
.ibh
.u
.l
.oth
;
60 ohdr
= &ps
->s_txreq
->phdr
.hdr
.ibh
.u
.oth
;
62 /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
64 if ((rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) & IB_AH_GRH
) &&
65 (hfi1_check_mcast(rdma_ah_get_dlid(&qp
->remote_ah_attr
))))
66 ohdr
= &ps
->s_txreq
->phdr
.hdr
.opah
.u
.l
.oth
;
68 ohdr
= &ps
->s_txreq
->phdr
.hdr
.opah
.u
.oth
;
71 /* Get the next send request. */
72 wqe
= rvt_get_swqe_ptr(qp
, qp
->s_cur
);
74 switch (qp
->s_state
) {
76 if (!(ib_rvt_state_ops
[qp
->state
] &
77 RVT_PROCESS_NEXT_SEND_OK
))
79 /* Check if send work queue is empty. */
80 if (qp
->s_cur
== READ_ONCE(qp
->s_head
)) {
85 * Local operations are processed immediately
86 * after all prior requests have completed.
88 if (wqe
->wr
.opcode
== IB_WR_REG_MR
||
89 wqe
->wr
.opcode
== IB_WR_LOCAL_INV
) {
93 if (qp
->s_last
!= qp
->s_cur
)
95 if (++qp
->s_cur
== qp
->s_size
)
97 if (!(wqe
->wr
.send_flags
& RVT_SEND_COMPLETION_ONLY
)) {
98 err
= rvt_invalidate_rkey(
99 qp
, wqe
->wr
.ex
.invalidate_rkey
);
102 rvt_send_complete(qp
, wqe
, err
? IB_WC_LOC_PROT_ERR
105 atomic_dec(&qp
->local_ops_pending
);
109 * Start a new request.
111 qp
->s_psn
= wqe
->psn
;
112 qp
->s_sge
.sge
= wqe
->sg_list
[0];
113 qp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
114 qp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
115 qp
->s_sge
.total_len
= wqe
->length
;
118 switch (wqe
->wr
.opcode
) {
120 case IB_WR_SEND_WITH_IMM
:
122 qp
->s_state
= OP(SEND_FIRST
);
126 if (wqe
->wr
.opcode
== IB_WR_SEND
) {
127 qp
->s_state
= OP(SEND_ONLY
);
130 OP(SEND_ONLY_WITH_IMMEDIATE
);
131 /* Immediate data comes after the BTH */
132 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
135 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
136 bth0
|= IB_BTH_SOLICITED
;
138 if (++qp
->s_cur
>= qp
->s_size
)
142 case IB_WR_RDMA_WRITE
:
143 case IB_WR_RDMA_WRITE_WITH_IMM
:
144 ohdr
->u
.rc
.reth
.vaddr
=
145 cpu_to_be64(wqe
->rdma_wr
.remote_addr
);
146 ohdr
->u
.rc
.reth
.rkey
=
147 cpu_to_be32(wqe
->rdma_wr
.rkey
);
148 ohdr
->u
.rc
.reth
.length
= cpu_to_be32(len
);
149 hwords
+= sizeof(struct ib_reth
) / 4;
151 qp
->s_state
= OP(RDMA_WRITE_FIRST
);
155 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
) {
156 qp
->s_state
= OP(RDMA_WRITE_ONLY
);
159 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
);
160 /* Immediate data comes after the RETH */
161 ohdr
->u
.rc
.imm_data
= wqe
->wr
.ex
.imm_data
;
163 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
164 bth0
|= IB_BTH_SOLICITED
;
167 if (++qp
->s_cur
>= qp
->s_size
)
177 qp
->s_state
= OP(SEND_MIDDLE
);
179 case OP(SEND_MIDDLE
):
183 middle
= HFI1_CAP_IS_KSET(SDMA_AHG
);
186 if (wqe
->wr
.opcode
== IB_WR_SEND
) {
187 qp
->s_state
= OP(SEND_LAST
);
189 qp
->s_state
= OP(SEND_LAST_WITH_IMMEDIATE
);
190 /* Immediate data comes after the BTH */
191 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
194 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
195 bth0
|= IB_BTH_SOLICITED
;
197 if (++qp
->s_cur
>= qp
->s_size
)
201 case OP(RDMA_WRITE_FIRST
):
202 qp
->s_state
= OP(RDMA_WRITE_MIDDLE
);
204 case OP(RDMA_WRITE_MIDDLE
):
208 middle
= HFI1_CAP_IS_KSET(SDMA_AHG
);
211 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
) {
212 qp
->s_state
= OP(RDMA_WRITE_LAST
);
215 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
);
216 /* Immediate data comes after the BTH */
217 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
219 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
220 bth0
|= IB_BTH_SOLICITED
;
223 if (++qp
->s_cur
>= qp
->s_size
)
228 ps
->s_txreq
->hdr_dwords
= hwords
;
229 ps
->s_txreq
->sde
= priv
->s_sde
;
230 ps
->s_txreq
->ss
= &qp
->s_sge
;
231 ps
->s_txreq
->s_cur_size
= len
;
232 hfi1_make_ruc_header(qp
, ohdr
, bth0
| (qp
->s_state
<< 24),
233 qp
->remote_qpn
, mask_psn(qp
->s_psn
++),
238 hfi1_put_txreq(ps
->s_txreq
);
243 hfi1_put_txreq(ps
->s_txreq
);
247 qp
->s_flags
&= ~RVT_S_BUSY
;
252 * hfi1_uc_rcv - handle an incoming UC packet
253 * @packet: the packet structure
255 * This is called from qp_rcv() to process an incoming UC packet
257 * Called at interrupt level.
259 void hfi1_uc_rcv(struct hfi1_packet
*packet
)
261 struct hfi1_ibport
*ibp
= rcd_to_iport(packet
->rcd
);
262 void *data
= packet
->payload
;
263 u32 tlen
= packet
->tlen
;
264 struct rvt_qp
*qp
= packet
->qp
;
265 struct ib_other_headers
*ohdr
= packet
->ohdr
;
266 u32 opcode
= packet
->opcode
;
267 u32 hdrsize
= packet
->hlen
;
269 u32 pad
= packet
->pad
;
272 struct ib_reth
*reth
;
274 u8 extra_bytes
= pad
+ packet
->extra_byte
+ (SIZE_OF_CRC
<< 2);
276 if (hfi1_ruc_check_hdr(ibp
, packet
))
279 process_ecn(qp
, packet
);
281 psn
= ib_bth_get_psn(ohdr
);
282 /* Compare the PSN verses the expected PSN. */
283 if (unlikely(cmp_psn(psn
, qp
->r_psn
) != 0)) {
285 * Handle a sequence error.
286 * Silently drop any current message.
290 if (qp
->r_state
== OP(SEND_FIRST
) ||
291 qp
->r_state
== OP(SEND_MIDDLE
)) {
292 set_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
);
293 qp
->r_sge
.num_sge
= 0;
295 rvt_put_ss(&qp
->r_sge
);
297 qp
->r_state
= OP(SEND_LAST
);
301 case OP(SEND_ONLY_WITH_IMMEDIATE
):
304 case OP(RDMA_WRITE_FIRST
):
305 case OP(RDMA_WRITE_ONLY
):
306 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
):
314 /* Check for opcode sequence errors. */
315 switch (qp
->r_state
) {
317 case OP(SEND_MIDDLE
):
318 if (opcode
== OP(SEND_MIDDLE
) ||
319 opcode
== OP(SEND_LAST
) ||
320 opcode
== OP(SEND_LAST_WITH_IMMEDIATE
))
324 case OP(RDMA_WRITE_FIRST
):
325 case OP(RDMA_WRITE_MIDDLE
):
326 if (opcode
== OP(RDMA_WRITE_MIDDLE
) ||
327 opcode
== OP(RDMA_WRITE_LAST
) ||
328 opcode
== OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
))
333 if (opcode
== OP(SEND_FIRST
) ||
334 opcode
== OP(SEND_ONLY
) ||
335 opcode
== OP(SEND_ONLY_WITH_IMMEDIATE
) ||
336 opcode
== OP(RDMA_WRITE_FIRST
) ||
337 opcode
== OP(RDMA_WRITE_ONLY
) ||
338 opcode
== OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
))
343 if (qp
->state
== IB_QPS_RTR
&& !(qp
->r_flags
& RVT_R_COMM_EST
))
346 /* OK, process the packet. */
350 case OP(SEND_ONLY_WITH_IMMEDIATE
):
352 if (test_and_clear_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
)) {
353 qp
->r_sge
= qp
->s_rdma_read_sge
;
355 ret
= rvt_get_rwqe(qp
, false);
361 * qp->s_rdma_read_sge will be the owner
362 * of the mr references.
364 qp
->s_rdma_read_sge
= qp
->r_sge
;
367 if (opcode
== OP(SEND_ONLY
))
368 goto no_immediate_data
;
369 else if (opcode
== OP(SEND_ONLY_WITH_IMMEDIATE
))
372 case OP(SEND_MIDDLE
):
373 /* Check for invalid length PMTU or posted rwqe len. */
375 * There will be no padding for 9B packet but 16B packets
376 * will come in with some padding since we always add
377 * CRC and LT bytes which will need to be flit aligned
379 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ extra_bytes
)))
381 qp
->r_rcv_len
+= pmtu
;
382 if (unlikely(qp
->r_rcv_len
> qp
->r_len
))
384 rvt_copy_sge(qp
, &qp
->r_sge
, data
, pmtu
, false, false);
387 case OP(SEND_LAST_WITH_IMMEDIATE
):
389 wc
.ex
.imm_data
= ohdr
->u
.imm_data
;
390 wc
.wc_flags
= IB_WC_WITH_IMM
;
397 /* Check for invalid length. */
398 /* LAST len should be >= 1 */
399 if (unlikely(tlen
< (hdrsize
+ extra_bytes
)))
401 /* Don't count the CRC. */
402 tlen
-= (hdrsize
+ extra_bytes
);
403 wc
.byte_len
= tlen
+ qp
->r_rcv_len
;
404 if (unlikely(wc
.byte_len
> qp
->r_len
))
406 wc
.opcode
= IB_WC_RECV
;
407 rvt_copy_sge(qp
, &qp
->r_sge
, data
, tlen
, false, false);
408 rvt_put_ss(&qp
->s_rdma_read_sge
);
410 wc
.wr_id
= qp
->r_wr_id
;
411 wc
.status
= IB_WC_SUCCESS
;
413 wc
.src_qp
= qp
->remote_qpn
;
414 wc
.slid
= rdma_ah_get_dlid(&qp
->remote_ah_attr
) & U16_MAX
;
416 * It seems that IB mandates the presence of an SL in a
417 * work completion only for the UD transport (see section
418 * 11.4.2 of IBTA Vol. 1).
420 * However, the way the SL is chosen below is consistent
421 * with the way that IB/qib works and is trying avoid
422 * introducing incompatibilities.
424 * See also OPA Vol. 1, section 9.7.6, and table 9-17.
426 wc
.sl
= rdma_ah_get_sl(&qp
->remote_ah_attr
);
427 /* zero fields that are N/A */
430 wc
.dlid_path_bits
= 0;
432 /* Signal completion event if the solicited bit is set. */
433 rvt_recv_cq(qp
, &wc
, ib_bth_is_solicited(ohdr
));
436 case OP(RDMA_WRITE_FIRST
):
437 case OP(RDMA_WRITE_ONLY
):
438 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
): /* consume RWQE */
440 if (unlikely(!(qp
->qp_access_flags
&
441 IB_ACCESS_REMOTE_WRITE
))) {
444 reth
= &ohdr
->u
.rc
.reth
;
445 qp
->r_len
= be32_to_cpu(reth
->length
);
447 qp
->r_sge
.sg_list
= NULL
;
448 if (qp
->r_len
!= 0) {
449 u32 rkey
= be32_to_cpu(reth
->rkey
);
450 u64 vaddr
= be64_to_cpu(reth
->vaddr
);
454 ok
= rvt_rkey_ok(qp
, &qp
->r_sge
.sge
, qp
->r_len
,
455 vaddr
, rkey
, IB_ACCESS_REMOTE_WRITE
);
458 qp
->r_sge
.num_sge
= 1;
460 qp
->r_sge
.num_sge
= 0;
461 qp
->r_sge
.sge
.mr
= NULL
;
462 qp
->r_sge
.sge
.vaddr
= NULL
;
463 qp
->r_sge
.sge
.length
= 0;
464 qp
->r_sge
.sge
.sge_length
= 0;
466 if (opcode
== OP(RDMA_WRITE_ONLY
)) {
468 } else if (opcode
== OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
)) {
469 wc
.ex
.imm_data
= ohdr
->u
.rc
.imm_data
;
473 case OP(RDMA_WRITE_MIDDLE
):
474 /* Check for invalid length PMTU or posted rwqe len. */
475 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ 4)))
477 qp
->r_rcv_len
+= pmtu
;
478 if (unlikely(qp
->r_rcv_len
> qp
->r_len
))
480 rvt_copy_sge(qp
, &qp
->r_sge
, data
, pmtu
, true, false);
483 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
):
484 wc
.ex
.imm_data
= ohdr
->u
.imm_data
;
486 wc
.wc_flags
= IB_WC_WITH_IMM
;
488 /* Check for invalid length. */
489 /* LAST len should be >= 1 */
490 if (unlikely(tlen
< (hdrsize
+ pad
+ 4)))
492 /* Don't count the CRC. */
493 tlen
-= (hdrsize
+ extra_bytes
);
494 if (unlikely(tlen
+ qp
->r_rcv_len
!= qp
->r_len
))
496 if (test_and_clear_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
)) {
497 rvt_put_ss(&qp
->s_rdma_read_sge
);
499 ret
= rvt_get_rwqe(qp
, true);
505 wc
.byte_len
= qp
->r_len
;
506 wc
.opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
507 rvt_copy_sge(qp
, &qp
->r_sge
, data
, tlen
, true, false);
508 rvt_put_ss(&qp
->r_sge
);
511 case OP(RDMA_WRITE_LAST
):
513 /* Check for invalid length. */
514 /* LAST len should be >= 1 */
515 if (unlikely(tlen
< (hdrsize
+ pad
+ 4)))
517 /* Don't count the CRC. */
518 tlen
-= (hdrsize
+ extra_bytes
);
519 if (unlikely(tlen
+ qp
->r_rcv_len
!= qp
->r_len
))
521 rvt_copy_sge(qp
, &qp
->r_sge
, data
, tlen
, true, false);
522 rvt_put_ss(&qp
->r_sge
);
526 /* Drop packet for unknown opcodes. */
530 qp
->r_state
= opcode
;
534 set_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
);
535 qp
->r_sge
.num_sge
= 0;
537 ibp
->rvp
.n_pkt_drops
++;
541 rvt_rc_error(qp
, IB_WC_LOC_QP_OP_ERR
);