2 * Copyright(c) 2015 - 2017 Intel Corporation.
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 #include <linux/spinlock.h>
53 #include "verbs_txreq.h"
57 * Validate a RWQE and fill in the SGE state.
60 static int init_sge(struct rvt_qp
*qp
, struct rvt_rwqe
*wqe
)
64 struct rvt_lkey_table
*rkt
;
66 struct rvt_sge_state
*ss
;
68 rkt
= &to_idev(qp
->ibqp
.device
)->rdi
.lkey_table
;
69 pd
= ibpd_to_rvtpd(qp
->ibqp
.srq
? qp
->ibqp
.srq
->pd
: qp
->ibqp
.pd
);
71 ss
->sg_list
= qp
->r_sg_list
;
73 for (i
= j
= 0; i
< wqe
->num_sge
; i
++) {
74 if (wqe
->sg_list
[i
].length
== 0)
77 ret
= rvt_lkey_ok(rkt
, pd
, j
? &ss
->sg_list
[j
- 1] : &ss
->sge
,
78 NULL
, &wqe
->sg_list
[i
],
79 IB_ACCESS_LOCAL_WRITE
);
80 if (unlikely(ret
<= 0))
82 qp
->r_len
+= wqe
->sg_list
[i
].length
;
86 ss
->total_len
= qp
->r_len
;
92 struct rvt_sge
*sge
= --j
? &ss
->sg_list
[j
- 1] : &ss
->sge
;
97 memset(&wc
, 0, sizeof(wc
));
98 wc
.wr_id
= wqe
->wr_id
;
99 wc
.status
= IB_WC_LOC_PROT_ERR
;
100 wc
.opcode
= IB_WC_RECV
;
102 /* Signal solicited completion event. */
103 rvt_cq_enter(ibcq_to_rvtcq(qp
->ibqp
.recv_cq
), &wc
, 1);
110 * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
112 * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
114 * Return -1 if there is a local error, 0 if no RWQE is available,
115 * otherwise return 1.
117 * Can be called from interrupt level.
119 int hfi1_rvt_get_rwqe(struct rvt_qp
*qp
, int wr_id_only
)
125 struct rvt_rwqe
*wqe
;
126 void (*handler
)(struct ib_event
*, void *);
131 srq
= ibsrq_to_rvtsrq(qp
->ibqp
.srq
);
132 handler
= srq
->ibsrq
.event_handler
;
140 spin_lock_irqsave(&rq
->lock
, flags
);
141 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_RECV_OK
)) {
148 /* Validate tail before using it since it is user writable. */
149 if (tail
>= rq
->size
)
151 if (unlikely(tail
== wq
->head
)) {
155 /* Make sure entry is read after head index is read. */
157 wqe
= rvt_get_rwqe_ptr(rq
, tail
);
159 * Even though we update the tail index in memory, the verbs
160 * consumer is not supposed to post more entries until a
161 * completion is generated.
163 if (++tail
>= rq
->size
)
166 if (!wr_id_only
&& !init_sge(qp
, wqe
)) {
170 qp
->r_wr_id
= wqe
->wr_id
;
173 set_bit(RVT_R_WRID_VALID
, &qp
->r_aflags
);
178 * Validate head pointer value and compute
179 * the number of remaining WQEs.
185 n
+= rq
->size
- tail
;
188 if (n
< srq
->limit
) {
192 spin_unlock_irqrestore(&rq
->lock
, flags
);
193 ev
.device
= qp
->ibqp
.device
;
194 ev
.element
.srq
= qp
->ibqp
.srq
;
195 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
196 handler(&ev
, srq
->ibsrq
.srq_context
);
201 spin_unlock_irqrestore(&rq
->lock
, flags
);
206 static int gid_ok(union ib_gid
*gid
, __be64 gid_prefix
, __be64 id
)
208 return (gid
->global
.interface_id
== id
&&
209 (gid
->global
.subnet_prefix
== gid_prefix
||
210 gid
->global
.subnet_prefix
== IB_DEFAULT_GID_PREFIX
));
215 * This should be called with the QP r_lock held.
217 * The s_lock will be acquired around the hfi1_migrate_qp() call.
219 int hfi1_ruc_check_hdr(struct hfi1_ibport
*ibp
, struct hfi1_packet
*packet
)
223 struct rvt_qp
*qp
= packet
->qp
;
224 u8 sc5
= ibp
->sl_to_sc
[rdma_ah_get_sl(&qp
->remote_ah_attr
)];
225 u32 dlid
= packet
->dlid
;
226 u32 slid
= packet
->slid
;
228 bool migrated
= packet
->migrated
;
229 u16 pkey
= packet
->pkey
;
231 if (qp
->s_mig_state
== IB_MIG_ARMED
&& migrated
) {
233 if ((rdma_ah_get_ah_flags(&qp
->alt_ah_attr
) &
235 (packet
->etype
!= RHF_RCV_TYPE_BYPASS
))
238 const struct ib_global_route
*grh
;
240 if (!(rdma_ah_get_ah_flags(&qp
->alt_ah_attr
) &
243 grh
= rdma_ah_read_grh(&qp
->alt_ah_attr
);
244 guid
= get_sguid(ibp
, grh
->sgid_index
);
245 if (!gid_ok(&packet
->grh
->dgid
, ibp
->rvp
.gid_prefix
,
250 grh
->dgid
.global
.subnet_prefix
,
251 grh
->dgid
.global
.interface_id
))
254 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp
), pkey
,
256 hfi1_bad_pkey(ibp
, pkey
, sl
, 0, qp
->ibqp
.qp_num
,
260 /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
261 if (slid
!= rdma_ah_get_dlid(&qp
->alt_ah_attr
) ||
262 ppd_from_ibp(ibp
)->port
!=
263 rdma_ah_get_port_num(&qp
->alt_ah_attr
))
265 spin_lock_irqsave(&qp
->s_lock
, flags
);
267 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
270 if ((rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) &
272 (packet
->etype
!= RHF_RCV_TYPE_BYPASS
))
275 const struct ib_global_route
*grh
;
277 if (!(rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) &
280 grh
= rdma_ah_read_grh(&qp
->remote_ah_attr
);
281 guid
= get_sguid(ibp
, grh
->sgid_index
);
282 if (!gid_ok(&packet
->grh
->dgid
, ibp
->rvp
.gid_prefix
,
287 grh
->dgid
.global
.subnet_prefix
,
288 grh
->dgid
.global
.interface_id
))
291 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp
), pkey
,
293 hfi1_bad_pkey(ibp
, pkey
, sl
, 0, qp
->ibqp
.qp_num
,
297 /* Validate the SLID. See Ch. 9.6.1.5 */
298 if ((slid
!= rdma_ah_get_dlid(&qp
->remote_ah_attr
)) ||
299 ppd_from_ibp(ibp
)->port
!= qp
->port_num
)
301 if (qp
->s_mig_state
== IB_MIG_REARM
&& !migrated
)
302 qp
->s_mig_state
= IB_MIG_ARMED
;
309 * ruc_loopback - handle UC and RC loopback requests
310 * @sqp: the sending QP
312 * This is called from hfi1_do_send() to
313 * forward a WQE addressed to the same HFI.
314 * Note that although we are single threaded due to the send engine, we still
315 * have to protect against post_send(). We don't have to worry about
316 * receive interrupts since this is a connected protocol and all packets
317 * will pass through here.
319 static void ruc_loopback(struct rvt_qp
*sqp
)
321 struct hfi1_ibport
*ibp
= to_iport(sqp
->ibqp
.device
, sqp
->port_num
);
323 struct rvt_swqe
*wqe
;
329 enum ib_wc_status send_status
;
332 bool copy_last
= false;
338 * Note that we check the responder QP state after
339 * checking the requester's state.
341 qp
= rvt_lookup_qpn(ib_to_rvt(sqp
->ibqp
.device
), &ibp
->rvp
,
344 spin_lock_irqsave(&sqp
->s_lock
, flags
);
346 /* Return if we are already busy processing a work request. */
347 if ((sqp
->s_flags
& (RVT_S_BUSY
| RVT_S_ANY_WAIT
)) ||
348 !(ib_rvt_state_ops
[sqp
->state
] & RVT_PROCESS_OR_FLUSH_SEND
))
351 sqp
->s_flags
|= RVT_S_BUSY
;
354 if (sqp
->s_last
== READ_ONCE(sqp
->s_head
))
356 wqe
= rvt_get_swqe_ptr(sqp
, sqp
->s_last
);
358 /* Return if it is not OK to start a new work request. */
359 if (!(ib_rvt_state_ops
[sqp
->state
] & RVT_PROCESS_NEXT_SEND_OK
)) {
360 if (!(ib_rvt_state_ops
[sqp
->state
] & RVT_FLUSH_SEND
))
362 /* We are in the error state, flush the work request. */
363 send_status
= IB_WC_WR_FLUSH_ERR
;
368 * We can rely on the entry not changing without the s_lock
369 * being held until we update s_last.
370 * We increment s_cur to indicate s_last is in progress.
372 if (sqp
->s_last
== sqp
->s_cur
) {
373 if (++sqp
->s_cur
>= sqp
->s_size
)
376 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
378 if (!qp
|| !(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_RECV_OK
) ||
379 qp
->ibqp
.qp_type
!= sqp
->ibqp
.qp_type
) {
380 ibp
->rvp
.n_pkt_drops
++;
382 * For RC, the requester would timeout and retry so
383 * shortcut the timeouts and just signal too many retries.
385 if (sqp
->ibqp
.qp_type
== IB_QPT_RC
)
386 send_status
= IB_WC_RETRY_EXC_ERR
;
388 send_status
= IB_WC_SUCCESS
;
392 memset(&wc
, 0, sizeof(wc
));
393 send_status
= IB_WC_SUCCESS
;
396 sqp
->s_sge
.sge
= wqe
->sg_list
[0];
397 sqp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
398 sqp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
399 sqp
->s_len
= wqe
->length
;
400 switch (wqe
->wr
.opcode
) {
404 case IB_WR_LOCAL_INV
:
405 if (!(wqe
->wr
.send_flags
& RVT_SEND_COMPLETION_ONLY
)) {
406 if (rvt_invalidate_rkey(sqp
,
407 wqe
->wr
.ex
.invalidate_rkey
))
408 send_status
= IB_WC_LOC_PROT_ERR
;
413 case IB_WR_SEND_WITH_INV
:
414 if (!rvt_invalidate_rkey(qp
, wqe
->wr
.ex
.invalidate_rkey
)) {
415 wc
.wc_flags
= IB_WC_WITH_INVALIDATE
;
416 wc
.ex
.invalidate_rkey
= wqe
->wr
.ex
.invalidate_rkey
;
420 case IB_WR_SEND_WITH_IMM
:
421 wc
.wc_flags
= IB_WC_WITH_IMM
;
422 wc
.ex
.imm_data
= wqe
->wr
.ex
.imm_data
;
426 ret
= hfi1_rvt_get_rwqe(qp
, 0);
433 case IB_WR_RDMA_WRITE_WITH_IMM
:
434 if (unlikely(!(qp
->qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)))
436 wc
.wc_flags
= IB_WC_WITH_IMM
;
437 wc
.ex
.imm_data
= wqe
->wr
.ex
.imm_data
;
438 ret
= hfi1_rvt_get_rwqe(qp
, 1);
443 /* skip copy_last set and qp_access_flags recheck */
445 case IB_WR_RDMA_WRITE
:
446 copy_last
= rvt_is_user_qp(qp
);
447 if (unlikely(!(qp
->qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)))
450 if (wqe
->length
== 0)
452 if (unlikely(!rvt_rkey_ok(qp
, &qp
->r_sge
.sge
, wqe
->length
,
453 wqe
->rdma_wr
.remote_addr
,
455 IB_ACCESS_REMOTE_WRITE
)))
457 qp
->r_sge
.sg_list
= NULL
;
458 qp
->r_sge
.num_sge
= 1;
459 qp
->r_sge
.total_len
= wqe
->length
;
462 case IB_WR_RDMA_READ
:
463 if (unlikely(!(qp
->qp_access_flags
& IB_ACCESS_REMOTE_READ
)))
465 if (unlikely(!rvt_rkey_ok(qp
, &sqp
->s_sge
.sge
, wqe
->length
,
466 wqe
->rdma_wr
.remote_addr
,
468 IB_ACCESS_REMOTE_READ
)))
471 sqp
->s_sge
.sg_list
= NULL
;
472 sqp
->s_sge
.num_sge
= 1;
473 qp
->r_sge
.sge
= wqe
->sg_list
[0];
474 qp
->r_sge
.sg_list
= wqe
->sg_list
+ 1;
475 qp
->r_sge
.num_sge
= wqe
->wr
.num_sge
;
476 qp
->r_sge
.total_len
= wqe
->length
;
479 case IB_WR_ATOMIC_CMP_AND_SWP
:
480 case IB_WR_ATOMIC_FETCH_AND_ADD
:
481 if (unlikely(!(qp
->qp_access_flags
& IB_ACCESS_REMOTE_ATOMIC
)))
483 if (unlikely(!rvt_rkey_ok(qp
, &qp
->r_sge
.sge
, sizeof(u64
),
484 wqe
->atomic_wr
.remote_addr
,
486 IB_ACCESS_REMOTE_ATOMIC
)))
488 /* Perform atomic OP and save result. */
489 maddr
= (atomic64_t
*)qp
->r_sge
.sge
.vaddr
;
490 sdata
= wqe
->atomic_wr
.compare_add
;
491 *(u64
*)sqp
->s_sge
.sge
.vaddr
=
492 (wqe
->wr
.opcode
== IB_WR_ATOMIC_FETCH_AND_ADD
) ?
493 (u64
)atomic64_add_return(sdata
, maddr
) - sdata
:
494 (u64
)cmpxchg((u64
*)qp
->r_sge
.sge
.vaddr
,
495 sdata
, wqe
->atomic_wr
.swap
);
496 rvt_put_mr(qp
->r_sge
.sge
.mr
);
497 qp
->r_sge
.num_sge
= 0;
501 send_status
= IB_WC_LOC_QP_OP_ERR
;
505 sge
= &sqp
->s_sge
.sge
;
507 u32 len
= sqp
->s_len
;
509 if (len
> sge
->length
)
511 if (len
> sge
->sge_length
)
512 len
= sge
->sge_length
;
513 WARN_ON_ONCE(len
== 0);
514 hfi1_copy_sge(&qp
->r_sge
, sge
->vaddr
, len
, release
, copy_last
);
517 sge
->sge_length
-= len
;
518 if (sge
->sge_length
== 0) {
521 if (--sqp
->s_sge
.num_sge
)
522 *sge
= *sqp
->s_sge
.sg_list
++;
523 } else if (sge
->length
== 0 && sge
->mr
->lkey
) {
524 if (++sge
->n
>= RVT_SEGSZ
) {
525 if (++sge
->m
>= sge
->mr
->mapsz
)
530 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
532 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
537 rvt_put_ss(&qp
->r_sge
);
539 if (!test_and_clear_bit(RVT_R_WRID_VALID
, &qp
->r_aflags
))
542 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE_WITH_IMM
)
543 wc
.opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
545 wc
.opcode
= IB_WC_RECV
;
546 wc
.wr_id
= qp
->r_wr_id
;
547 wc
.status
= IB_WC_SUCCESS
;
548 wc
.byte_len
= wqe
->length
;
550 wc
.src_qp
= qp
->remote_qpn
;
551 wc
.slid
= rdma_ah_get_dlid(&qp
->remote_ah_attr
) & U16_MAX
;
552 wc
.sl
= rdma_ah_get_sl(&qp
->remote_ah_attr
);
554 /* Signal completion event if the solicited bit is set. */
555 rvt_cq_enter(ibcq_to_rvtcq(qp
->ibqp
.recv_cq
), &wc
,
556 wqe
->wr
.send_flags
& IB_SEND_SOLICITED
);
559 spin_lock_irqsave(&sqp
->s_lock
, flags
);
560 ibp
->rvp
.n_loop_pkts
++;
562 sqp
->s_rnr_retry
= sqp
->s_rnr_retry_cnt
;
563 hfi1_send_complete(sqp
, wqe
, send_status
);
565 atomic_dec(&sqp
->local_ops_pending
);
572 if (qp
->ibqp
.qp_type
== IB_QPT_UC
)
574 ibp
->rvp
.n_rnr_naks
++;
576 * Note: we don't need the s_lock held since the BUSY flag
577 * makes this single threaded.
579 if (sqp
->s_rnr_retry
== 0) {
580 send_status
= IB_WC_RNR_RETRY_EXC_ERR
;
583 if (sqp
->s_rnr_retry_cnt
< 7)
585 spin_lock_irqsave(&sqp
->s_lock
, flags
);
586 if (!(ib_rvt_state_ops
[sqp
->state
] & RVT_PROCESS_RECV_OK
))
588 rvt_add_rnr_timer(sqp
, qp
->r_min_rnr_timer
<<
589 IB_AETH_CREDIT_SHIFT
);
593 send_status
= IB_WC_REM_OP_ERR
;
594 wc
.status
= IB_WC_LOC_QP_OP_ERR
;
598 send_status
= IB_WC_REM_INV_REQ_ERR
;
599 wc
.status
= IB_WC_LOC_QP_OP_ERR
;
603 send_status
= IB_WC_REM_ACCESS_ERR
;
604 wc
.status
= IB_WC_LOC_PROT_ERR
;
606 /* responder goes to error state */
607 rvt_rc_error(qp
, wc
.status
);
610 spin_lock_irqsave(&sqp
->s_lock
, flags
);
611 hfi1_send_complete(sqp
, wqe
, send_status
);
612 if (sqp
->ibqp
.qp_type
== IB_QPT_RC
) {
613 int lastwqe
= rvt_error_qp(sqp
, IB_WC_WR_FLUSH_ERR
);
615 sqp
->s_flags
&= ~RVT_S_BUSY
;
616 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
620 ev
.device
= sqp
->ibqp
.device
;
621 ev
.element
.qp
= &sqp
->ibqp
;
622 ev
.event
= IB_EVENT_QP_LAST_WQE_REACHED
;
623 sqp
->ibqp
.event_handler(&ev
, sqp
->ibqp
.qp_context
);
628 sqp
->s_flags
&= ~RVT_S_BUSY
;
630 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
636 * hfi1_make_grh - construct a GRH header
637 * @ibp: a pointer to the IB port
638 * @hdr: a pointer to the GRH header being constructed
639 * @grh: the global route address to send to
640 * @hwords: size of header after grh being sent in dwords
641 * @nwords: the number of 32 bit words of data being sent
643 * Return the size of the header in 32 bit words.
645 u32
hfi1_make_grh(struct hfi1_ibport
*ibp
, struct ib_grh
*hdr
,
646 const struct ib_global_route
*grh
, u32 hwords
, u32 nwords
)
648 hdr
->version_tclass_flow
=
649 cpu_to_be32((IB_GRH_VERSION
<< IB_GRH_VERSION_SHIFT
) |
650 (grh
->traffic_class
<< IB_GRH_TCLASS_SHIFT
) |
651 (grh
->flow_label
<< IB_GRH_FLOW_SHIFT
));
652 hdr
->paylen
= cpu_to_be16((hwords
+ nwords
) << 2);
653 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
654 hdr
->next_hdr
= IB_GRH_NEXT_HDR
;
655 hdr
->hop_limit
= grh
->hop_limit
;
656 /* The SGID is 32-bit aligned. */
657 hdr
->sgid
.global
.subnet_prefix
= ibp
->rvp
.gid_prefix
;
658 hdr
->sgid
.global
.interface_id
=
659 grh
->sgid_index
< HFI1_GUIDS_PER_PORT
?
660 get_sguid(ibp
, grh
->sgid_index
) :
661 get_sguid(ibp
, HFI1_PORT_GUID_INDEX
);
662 hdr
->dgid
= grh
->dgid
;
664 /* GRH header size in 32-bit words. */
665 return sizeof(struct ib_grh
) / sizeof(u32
);
668 #define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, \
669 hdr.ibh.u.oth.bth[2]) / 4)
672 * build_ahg - create ahg in s_ahg
673 * @qp: a pointer to QP
674 * @npsn: the next PSN for the request/response
676 * This routine handles the AHG by allocating an ahg entry and causing the
677 * copy of the first middle.
679 * Subsequent middles use the copied entry, editing the
680 * PSN with 1 or 2 edits.
682 static inline void build_ahg(struct rvt_qp
*qp
, u32 npsn
)
684 struct hfi1_qp_priv
*priv
= qp
->priv
;
686 if (unlikely(qp
->s_flags
& RVT_S_AHG_CLEAR
))
688 if (!(qp
->s_flags
& RVT_S_AHG_VALID
)) {
689 /* first middle that needs copy */
690 if (qp
->s_ahgidx
< 0)
691 qp
->s_ahgidx
= sdma_ahg_alloc(priv
->s_sde
);
692 if (qp
->s_ahgidx
>= 0) {
694 priv
->s_ahg
->tx_flags
|= SDMA_TXREQ_F_AHG_COPY
;
695 /* save to protect a change in another thread */
696 priv
->s_ahg
->ahgidx
= qp
->s_ahgidx
;
697 qp
->s_flags
|= RVT_S_AHG_VALID
;
700 /* subsequent middle after valid */
701 if (qp
->s_ahgidx
>= 0) {
702 priv
->s_ahg
->tx_flags
|= SDMA_TXREQ_F_USE_AHG
;
703 priv
->s_ahg
->ahgidx
= qp
->s_ahgidx
;
704 priv
->s_ahg
->ahgcount
++;
705 priv
->s_ahg
->ahgdesc
[0] =
706 sdma_build_ahg_descriptor(
707 (__force u16
)cpu_to_be16((u16
)npsn
),
711 if ((npsn
& 0xffff0000) !=
712 (qp
->s_ahgpsn
& 0xffff0000)) {
713 priv
->s_ahg
->ahgcount
++;
714 priv
->s_ahg
->ahgdesc
[1] =
715 sdma_build_ahg_descriptor(
716 (__force u16
)cpu_to_be16(
726 static inline void hfi1_make_ruc_bth(struct rvt_qp
*qp
,
727 struct ib_other_headers
*ohdr
,
728 u32 bth0
, u32 bth1
, u32 bth2
)
730 bth1
|= qp
->remote_qpn
;
731 ohdr
->bth
[0] = cpu_to_be32(bth0
);
732 ohdr
->bth
[1] = cpu_to_be32(bth1
);
733 ohdr
->bth
[2] = cpu_to_be32(bth2
);
736 static inline void hfi1_make_ruc_header_16B(struct rvt_qp
*qp
,
737 struct ib_other_headers
*ohdr
,
738 u32 bth0
, u32 bth2
, int middle
,
739 struct hfi1_pkt_state
*ps
)
741 struct hfi1_qp_priv
*priv
= qp
->priv
;
742 struct hfi1_ibport
*ibp
= ps
->ibp
;
743 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
746 u16 pkey
= hfi1_get_pkey(ibp
, qp
->s_pkey_index
);
747 u8 l4
= OPA_16B_L4_IB_LOCAL
;
748 u8 extra_bytes
= hfi1_get_16b_padding(
749 (ps
->s_txreq
->hdr_dwords
<< 2),
750 ps
->s_txreq
->s_cur_size
);
751 u32 nwords
= SIZE_OF_CRC
+ ((ps
->s_txreq
->s_cur_size
+
752 extra_bytes
+ SIZE_OF_LT
) >> 2);
755 if (unlikely(rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) & IB_AH_GRH
) &&
756 hfi1_check_mcast(rdma_ah_get_dlid(&qp
->remote_ah_attr
))) {
758 struct ib_global_route
*grd
=
759 rdma_ah_retrieve_grh(&qp
->remote_ah_attr
);
761 * Ensure OPA GIDs are transformed to IB gids
762 * before creating the GRH.
764 if (grd
->sgid_index
== OPA_GID_INDEX
)
766 grh
= &ps
->s_txreq
->phdr
.hdr
.opah
.u
.l
.grh
;
767 l4
= OPA_16B_L4_IB_GLOBAL
;
768 ps
->s_txreq
->hdr_dwords
+=
769 hfi1_make_grh(ibp
, grh
, grd
,
770 ps
->s_txreq
->hdr_dwords
- LRH_16B_DWORDS
,
775 if (qp
->s_mig_state
== IB_MIG_MIGRATED
)
776 bth1
|= OPA_BTH_MIG_REQ
;
783 qp
->s_flags
&= ~RVT_S_AHG_VALID
;
786 bth0
|= extra_bytes
<< 20;
787 if (qp
->s_flags
& RVT_S_ECN
) {
788 qp
->s_flags
&= ~RVT_S_ECN
;
789 /* we recently received a FECN, so return a BECN */
792 hfi1_make_ruc_bth(qp
, ohdr
, bth0
, bth1
, bth2
);
795 slid
= be32_to_cpu(OPA_LID_PERMISSIVE
);
798 (rdma_ah_get_path_bits(&qp
->remote_ah_attr
) &
799 ((1 << ppd
->lmc
) - 1));
801 hfi1_make_16b_hdr(&ps
->s_txreq
->phdr
.hdr
.opah
,
803 opa_get_lid(rdma_ah_get_dlid(&qp
->remote_ah_attr
),
805 (ps
->s_txreq
->hdr_dwords
+ nwords
) >> 1,
806 pkey
, becn
, 0, l4
, priv
->s_sc
);
809 static inline void hfi1_make_ruc_header_9B(struct rvt_qp
*qp
,
810 struct ib_other_headers
*ohdr
,
811 u32 bth0
, u32 bth2
, int middle
,
812 struct hfi1_pkt_state
*ps
)
814 struct hfi1_qp_priv
*priv
= qp
->priv
;
815 struct hfi1_ibport
*ibp
= ps
->ibp
;
817 u16 pkey
= hfi1_get_pkey(ibp
, qp
->s_pkey_index
);
818 u16 lrh0
= HFI1_LRH_BTH
;
819 u8 extra_bytes
= -ps
->s_txreq
->s_cur_size
& 3;
820 u32 nwords
= SIZE_OF_CRC
+ ((ps
->s_txreq
->s_cur_size
+
823 if (unlikely(rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) & IB_AH_GRH
)) {
824 struct ib_grh
*grh
= &ps
->s_txreq
->phdr
.hdr
.ibh
.u
.l
.grh
;
827 ps
->s_txreq
->hdr_dwords
+=
828 hfi1_make_grh(ibp
, grh
,
829 rdma_ah_read_grh(&qp
->remote_ah_attr
),
830 ps
->s_txreq
->hdr_dwords
- LRH_9B_DWORDS
,
834 lrh0
|= (priv
->s_sc
& 0xf) << 12 |
835 (rdma_ah_get_sl(&qp
->remote_ah_attr
) & 0xf) << 4;
837 if (qp
->s_mig_state
== IB_MIG_MIGRATED
)
838 bth0
|= IB_BTH_MIG_REQ
;
845 qp
->s_flags
&= ~RVT_S_AHG_VALID
;
848 bth0
|= extra_bytes
<< 20;
849 if (qp
->s_flags
& RVT_S_ECN
) {
850 qp
->s_flags
&= ~RVT_S_ECN
;
851 /* we recently received a FECN, so return a BECN */
852 bth1
|= (IB_BECN_MASK
<< IB_BECN_SHIFT
);
854 hfi1_make_ruc_bth(qp
, ohdr
, bth0
, bth1
, bth2
);
855 hfi1_make_ib_hdr(&ps
->s_txreq
->phdr
.hdr
.ibh
,
857 ps
->s_txreq
->hdr_dwords
+ nwords
,
858 opa_get_lid(rdma_ah_get_dlid(&qp
->remote_ah_attr
), 9B
),
859 ppd_from_ibp(ibp
)->lid
|
860 rdma_ah_get_path_bits(&qp
->remote_ah_attr
));
863 typedef void (*hfi1_make_ruc_hdr
)(struct rvt_qp
*qp
,
864 struct ib_other_headers
*ohdr
,
865 u32 bth0
, u32 bth2
, int middle
,
866 struct hfi1_pkt_state
*ps
);
868 /* We support only two types - 9B and 16B for now */
869 static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl
[2] = {
870 [HFI1_PKT_TYPE_9B
] = &hfi1_make_ruc_header_9B
,
871 [HFI1_PKT_TYPE_16B
] = &hfi1_make_ruc_header_16B
874 void hfi1_make_ruc_header(struct rvt_qp
*qp
, struct ib_other_headers
*ohdr
,
875 u32 bth0
, u32 bth2
, int middle
,
876 struct hfi1_pkt_state
*ps
)
878 struct hfi1_qp_priv
*priv
= qp
->priv
;
881 * reset s_ahg/AHG fields
883 * This insures that the ahgentry/ahgcount
884 * are at a non-AHG default to protect
885 * build_verbs_tx_desc() from using
888 * build_ahg() will modify as appropriate
889 * to use the AHG feature.
891 priv
->s_ahg
->tx_flags
= 0;
892 priv
->s_ahg
->ahgcount
= 0;
893 priv
->s_ahg
->ahgidx
= 0;
895 /* Make the appropriate header */
896 hfi1_ruc_header_tbl
[priv
->hdr_type
](qp
, ohdr
, bth0
, bth2
, middle
, ps
);
899 /* when sending, force a reschedule every one of these periods */
900 #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
903 * schedule_send_yield - test for a yield required for QP send engine
904 * @timeout: Final time for timeout slice for jiffies
905 * @qp: a pointer to QP
906 * @ps: a pointer to a structure with commonly lookup values for
907 * the the send engine progress
909 * This routine checks if the time slice for the QP has expired
910 * for RC QPs, if so an additional work entry is queued. At this
911 * point, other QPs have an opportunity to be scheduled. It
912 * returns true if a yield is required, otherwise, false
915 static bool schedule_send_yield(struct rvt_qp
*qp
,
916 struct hfi1_pkt_state
*ps
)
918 ps
->pkts_sent
= true;
920 if (unlikely(time_after(jiffies
, ps
->timeout
))) {
921 if (!ps
->in_thread
||
922 workqueue_congested(ps
->cpu
, ps
->ppd
->hfi1_wq
)) {
923 spin_lock_irqsave(&qp
->s_lock
, ps
->flags
);
924 qp
->s_flags
&= ~RVT_S_BUSY
;
925 hfi1_schedule_send(qp
);
926 spin_unlock_irqrestore(&qp
->s_lock
, ps
->flags
);
927 this_cpu_inc(*ps
->ppd
->dd
->send_schedule
);
928 trace_hfi1_rc_expired_time_slice(qp
, true);
933 this_cpu_inc(*ps
->ppd
->dd
->send_schedule
);
934 ps
->timeout
= jiffies
+ ps
->timeout_int
;
937 trace_hfi1_rc_expired_time_slice(qp
, false);
941 void hfi1_do_send_from_rvt(struct rvt_qp
*qp
)
943 hfi1_do_send(qp
, false);
946 void _hfi1_do_send(struct work_struct
*work
)
948 struct iowait
*wait
= container_of(work
, struct iowait
, iowork
);
949 struct rvt_qp
*qp
= iowait_to_qp(wait
);
951 hfi1_do_send(qp
, true);
955 * hfi1_do_send - perform a send on a QP
956 * @work: contains a pointer to the QP
957 * @in_thread: true if in a workqueue thread
959 * Process entries in the send work queue until credit or queue is
960 * exhausted. Only allow one CPU to send a packet per QP.
961 * Otherwise, two threads could send packets out of order.
963 void hfi1_do_send(struct rvt_qp
*qp
, bool in_thread
)
965 struct hfi1_pkt_state ps
;
966 struct hfi1_qp_priv
*priv
= qp
->priv
;
967 int (*make_req
)(struct rvt_qp
*qp
, struct hfi1_pkt_state
*ps
);
969 ps
.dev
= to_idev(qp
->ibqp
.device
);
970 ps
.ibp
= to_iport(qp
->ibqp
.device
, qp
->port_num
);
971 ps
.ppd
= ppd_from_ibp(ps
.ibp
);
972 ps
.in_thread
= in_thread
;
974 trace_hfi1_rc_do_send(qp
, in_thread
);
976 switch (qp
->ibqp
.qp_type
) {
978 if (!loopback
&& ((rdma_ah_get_dlid(&qp
->remote_ah_attr
) &
979 ~((1 << ps
.ppd
->lmc
) - 1)) ==
984 make_req
= hfi1_make_rc_req
;
985 ps
.timeout_int
= qp
->timeout_jiffies
;
988 if (!loopback
&& ((rdma_ah_get_dlid(&qp
->remote_ah_attr
) &
989 ~((1 << ps
.ppd
->lmc
) - 1)) ==
994 make_req
= hfi1_make_uc_req
;
995 ps
.timeout_int
= SEND_RESCHED_TIMEOUT
;
998 make_req
= hfi1_make_ud_req
;
999 ps
.timeout_int
= SEND_RESCHED_TIMEOUT
;
1002 spin_lock_irqsave(&qp
->s_lock
, ps
.flags
);
1004 /* Return if we are already busy processing a work request. */
1005 if (!hfi1_send_ok(qp
)) {
1006 spin_unlock_irqrestore(&qp
->s_lock
, ps
.flags
);
1010 qp
->s_flags
|= RVT_S_BUSY
;
1012 ps
.timeout_int
= ps
.timeout_int
/ 8;
1013 ps
.timeout
= jiffies
+ ps
.timeout_int
;
1014 ps
.cpu
= priv
->s_sde
? priv
->s_sde
->cpu
:
1015 cpumask_first(cpumask_of_node(ps
.ppd
->dd
->node
));
1016 ps
.pkts_sent
= false;
1018 /* insure a pre-built packet is handled */
1019 ps
.s_txreq
= get_waiting_verbs_txreq(qp
);
1021 /* Check for a constructed packet to be sent. */
1023 spin_unlock_irqrestore(&qp
->s_lock
, ps
.flags
);
1025 * If the packet cannot be sent now, return and
1026 * the send engine will be woken up later.
1028 if (hfi1_verbs_send(qp
, &ps
))
1030 /* allow other tasks to run */
1031 if (schedule_send_yield(qp
, &ps
))
1034 spin_lock_irqsave(&qp
->s_lock
, ps
.flags
);
1036 } while (make_req(qp
, &ps
));
1037 iowait_starve_clear(ps
.pkts_sent
, &priv
->s_iowait
);
1038 spin_unlock_irqrestore(&qp
->s_lock
, ps
.flags
);
1042 * This should be called with s_lock held.
1044 void hfi1_send_complete(struct rvt_qp
*qp
, struct rvt_swqe
*wqe
,
1045 enum ib_wc_status status
)
1049 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_OR_FLUSH_SEND
))
1054 trace_hfi1_qp_send_completion(qp
, wqe
, last
);
1055 if (++last
>= qp
->s_size
)
1057 trace_hfi1_qp_send_completion(qp
, wqe
, last
);
1059 /* See post_send() */
1062 if (qp
->ibqp
.qp_type
== IB_QPT_UD
||
1063 qp
->ibqp
.qp_type
== IB_QPT_SMI
||
1064 qp
->ibqp
.qp_type
== IB_QPT_GSI
)
1065 atomic_dec(&ibah_to_rvtah(wqe
->ud_wr
.ah
)->refcount
);
1067 rvt_qp_swqe_complete(qp
,
1069 ib_hfi1_wc_opcode
[wqe
->wr
.opcode
],
1072 if (qp
->s_acked
== old_last
)
1074 if (qp
->s_cur
== old_last
)
1076 if (qp
->s_tail
== old_last
)
1078 if (qp
->state
== IB_QPS_SQD
&& last
== qp
->s_cur
)