2 * Copyright(c) 2015 - 2018 Intel Corporation.
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 #include <linux/spinlock.h>
53 #include "verbs_txreq.h"
56 static int gid_ok(union ib_gid
*gid
, __be64 gid_prefix
, __be64 id
)
58 return (gid
->global
.interface_id
== id
&&
59 (gid
->global
.subnet_prefix
== gid_prefix
||
60 gid
->global
.subnet_prefix
== IB_DEFAULT_GID_PREFIX
));
65 * This should be called with the QP r_lock held.
67 * The s_lock will be acquired around the hfi1_migrate_qp() call.
69 int hfi1_ruc_check_hdr(struct hfi1_ibport
*ibp
, struct hfi1_packet
*packet
)
73 struct rvt_qp
*qp
= packet
->qp
;
74 u8 sc5
= ibp
->sl_to_sc
[rdma_ah_get_sl(&qp
->remote_ah_attr
)];
75 u32 dlid
= packet
->dlid
;
76 u32 slid
= packet
->slid
;
78 bool migrated
= packet
->migrated
;
79 u16 pkey
= packet
->pkey
;
81 if (qp
->s_mig_state
== IB_MIG_ARMED
&& migrated
) {
83 if ((rdma_ah_get_ah_flags(&qp
->alt_ah_attr
) &
85 (packet
->etype
!= RHF_RCV_TYPE_BYPASS
))
88 const struct ib_global_route
*grh
;
90 if (!(rdma_ah_get_ah_flags(&qp
->alt_ah_attr
) &
93 grh
= rdma_ah_read_grh(&qp
->alt_ah_attr
);
94 guid
= get_sguid(ibp
, grh
->sgid_index
);
95 if (!gid_ok(&packet
->grh
->dgid
, ibp
->rvp
.gid_prefix
,
100 grh
->dgid
.global
.subnet_prefix
,
101 grh
->dgid
.global
.interface_id
))
104 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp
), pkey
,
106 hfi1_bad_pkey(ibp
, pkey
, sl
, 0, qp
->ibqp
.qp_num
,
110 /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
111 if (slid
!= rdma_ah_get_dlid(&qp
->alt_ah_attr
) ||
112 ppd_from_ibp(ibp
)->port
!=
113 rdma_ah_get_port_num(&qp
->alt_ah_attr
))
115 spin_lock_irqsave(&qp
->s_lock
, flags
);
117 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
120 if ((rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) &
122 (packet
->etype
!= RHF_RCV_TYPE_BYPASS
))
125 const struct ib_global_route
*grh
;
127 if (!(rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) &
130 grh
= rdma_ah_read_grh(&qp
->remote_ah_attr
);
131 guid
= get_sguid(ibp
, grh
->sgid_index
);
132 if (!gid_ok(&packet
->grh
->dgid
, ibp
->rvp
.gid_prefix
,
137 grh
->dgid
.global
.subnet_prefix
,
138 grh
->dgid
.global
.interface_id
))
141 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp
), pkey
,
143 hfi1_bad_pkey(ibp
, pkey
, sl
, 0, qp
->ibqp
.qp_num
,
147 /* Validate the SLID. See Ch. 9.6.1.5 */
148 if ((slid
!= rdma_ah_get_dlid(&qp
->remote_ah_attr
)) ||
149 ppd_from_ibp(ibp
)->port
!= qp
->port_num
)
151 if (qp
->s_mig_state
== IB_MIG_REARM
&& !migrated
)
152 qp
->s_mig_state
= IB_MIG_ARMED
;
159 * hfi1_make_grh - construct a GRH header
160 * @ibp: a pointer to the IB port
161 * @hdr: a pointer to the GRH header being constructed
162 * @grh: the global route address to send to
163 * @hwords: size of header after grh being sent in dwords
164 * @nwords: the number of 32 bit words of data being sent
166 * Return the size of the header in 32 bit words.
168 u32
hfi1_make_grh(struct hfi1_ibport
*ibp
, struct ib_grh
*hdr
,
169 const struct ib_global_route
*grh
, u32 hwords
, u32 nwords
)
171 hdr
->version_tclass_flow
=
172 cpu_to_be32((IB_GRH_VERSION
<< IB_GRH_VERSION_SHIFT
) |
173 (grh
->traffic_class
<< IB_GRH_TCLASS_SHIFT
) |
174 (grh
->flow_label
<< IB_GRH_FLOW_SHIFT
));
175 hdr
->paylen
= cpu_to_be16((hwords
+ nwords
) << 2);
176 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
177 hdr
->next_hdr
= IB_GRH_NEXT_HDR
;
178 hdr
->hop_limit
= grh
->hop_limit
;
179 /* The SGID is 32-bit aligned. */
180 hdr
->sgid
.global
.subnet_prefix
= ibp
->rvp
.gid_prefix
;
181 hdr
->sgid
.global
.interface_id
=
182 grh
->sgid_index
< HFI1_GUIDS_PER_PORT
?
183 get_sguid(ibp
, grh
->sgid_index
) :
184 get_sguid(ibp
, HFI1_PORT_GUID_INDEX
);
185 hdr
->dgid
= grh
->dgid
;
187 /* GRH header size in 32-bit words. */
188 return sizeof(struct ib_grh
) / sizeof(u32
);
191 #define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, \
192 hdr.ibh.u.oth.bth[2]) / 4)
195 * build_ahg - create ahg in s_ahg
196 * @qp: a pointer to QP
197 * @npsn: the next PSN for the request/response
199 * This routine handles the AHG by allocating an ahg entry and causing the
200 * copy of the first middle.
202 * Subsequent middles use the copied entry, editing the
203 * PSN with 1 or 2 edits.
205 static inline void build_ahg(struct rvt_qp
*qp
, u32 npsn
)
207 struct hfi1_qp_priv
*priv
= qp
->priv
;
209 if (unlikely(qp
->s_flags
& HFI1_S_AHG_CLEAR
))
211 if (!(qp
->s_flags
& HFI1_S_AHG_VALID
)) {
212 /* first middle that needs copy */
213 if (qp
->s_ahgidx
< 0)
214 qp
->s_ahgidx
= sdma_ahg_alloc(priv
->s_sde
);
215 if (qp
->s_ahgidx
>= 0) {
217 priv
->s_ahg
->tx_flags
|= SDMA_TXREQ_F_AHG_COPY
;
218 /* save to protect a change in another thread */
219 priv
->s_ahg
->ahgidx
= qp
->s_ahgidx
;
220 qp
->s_flags
|= HFI1_S_AHG_VALID
;
223 /* subsequent middle after valid */
224 if (qp
->s_ahgidx
>= 0) {
225 priv
->s_ahg
->tx_flags
|= SDMA_TXREQ_F_USE_AHG
;
226 priv
->s_ahg
->ahgidx
= qp
->s_ahgidx
;
227 priv
->s_ahg
->ahgcount
++;
228 priv
->s_ahg
->ahgdesc
[0] =
229 sdma_build_ahg_descriptor(
230 (__force u16
)cpu_to_be16((u16
)npsn
),
234 if ((npsn
& 0xffff0000) !=
235 (qp
->s_ahgpsn
& 0xffff0000)) {
236 priv
->s_ahg
->ahgcount
++;
237 priv
->s_ahg
->ahgdesc
[1] =
238 sdma_build_ahg_descriptor(
239 (__force u16
)cpu_to_be16(
249 static inline void hfi1_make_ruc_bth(struct rvt_qp
*qp
,
250 struct ib_other_headers
*ohdr
,
251 u32 bth0
, u32 bth1
, u32 bth2
)
253 ohdr
->bth
[0] = cpu_to_be32(bth0
);
254 ohdr
->bth
[1] = cpu_to_be32(bth1
);
255 ohdr
->bth
[2] = cpu_to_be32(bth2
);
259 * hfi1_make_ruc_header_16B - build a 16B header
260 * @qp: the queue pair
261 * @ohdr: a pointer to the destination header memory
262 * @bth0: bth0 passed in from the RC/UC builder
263 * @bth2: bth2 passed in from the RC/UC builder
264 * @middle: non zero implies indicates ahg "could" be used
265 * @ps: the current packet state
267 * This routine may disarm ahg under these situations:
268 * - packet needs a GRH
270 * - migration state not IB_MIG_MIGRATED
272 static inline void hfi1_make_ruc_header_16B(struct rvt_qp
*qp
,
273 struct ib_other_headers
*ohdr
,
274 u32 bth0
, u32 bth1
, u32 bth2
,
276 struct hfi1_pkt_state
*ps
)
278 struct hfi1_qp_priv
*priv
= qp
->priv
;
279 struct hfi1_ibport
*ibp
= ps
->ibp
;
280 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
282 u16 pkey
= hfi1_get_pkey(ibp
, qp
->s_pkey_index
);
283 u8 l4
= OPA_16B_L4_IB_LOCAL
;
284 u8 extra_bytes
= hfi1_get_16b_padding(
285 (ps
->s_txreq
->hdr_dwords
<< 2),
286 ps
->s_txreq
->s_cur_size
);
287 u32 nwords
= SIZE_OF_CRC
+ ((ps
->s_txreq
->s_cur_size
+
288 extra_bytes
+ SIZE_OF_LT
) >> 2);
291 if (unlikely(rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) & IB_AH_GRH
) &&
292 hfi1_check_mcast(rdma_ah_get_dlid(&qp
->remote_ah_attr
))) {
294 struct ib_global_route
*grd
=
295 rdma_ah_retrieve_grh(&qp
->remote_ah_attr
);
297 * Ensure OPA GIDs are transformed to IB gids
298 * before creating the GRH.
300 if (grd
->sgid_index
== OPA_GID_INDEX
)
302 grh
= &ps
->s_txreq
->phdr
.hdr
.opah
.u
.l
.grh
;
303 l4
= OPA_16B_L4_IB_GLOBAL
;
304 ps
->s_txreq
->hdr_dwords
+=
305 hfi1_make_grh(ibp
, grh
, grd
,
306 ps
->s_txreq
->hdr_dwords
- LRH_16B_DWORDS
,
311 if (qp
->s_mig_state
== IB_MIG_MIGRATED
)
312 bth1
|= OPA_BTH_MIG_REQ
;
316 if (qp
->s_flags
& RVT_S_ECN
) {
317 qp
->s_flags
&= ~RVT_S_ECN
;
318 /* we recently received a FECN, so return a BECN */
325 qp
->s_flags
&= ~HFI1_S_AHG_VALID
;
328 bth0
|= extra_bytes
<< 20;
329 hfi1_make_ruc_bth(qp
, ohdr
, bth0
, bth1
, bth2
);
332 slid
= be32_to_cpu(OPA_LID_PERMISSIVE
);
335 (rdma_ah_get_path_bits(&qp
->remote_ah_attr
) &
336 ((1 << ppd
->lmc
) - 1));
338 hfi1_make_16b_hdr(&ps
->s_txreq
->phdr
.hdr
.opah
,
340 opa_get_lid(rdma_ah_get_dlid(&qp
->remote_ah_attr
),
342 (ps
->s_txreq
->hdr_dwords
+ nwords
) >> 1,
343 pkey
, becn
, 0, l4
, priv
->s_sc
);
347 * hfi1_make_ruc_header_9B - build a 9B header
348 * @qp: the queue pair
349 * @ohdr: a pointer to the destination header memory
350 * @bth0: bth0 passed in from the RC/UC builder
351 * @bth2: bth2 passed in from the RC/UC builder
352 * @middle: non zero implies indicates ahg "could" be used
353 * @ps: the current packet state
355 * This routine may disarm ahg under these situations:
356 * - packet needs a GRH
358 * - migration state not IB_MIG_MIGRATED
360 static inline void hfi1_make_ruc_header_9B(struct rvt_qp
*qp
,
361 struct ib_other_headers
*ohdr
,
362 u32 bth0
, u32 bth1
, u32 bth2
,
364 struct hfi1_pkt_state
*ps
)
366 struct hfi1_qp_priv
*priv
= qp
->priv
;
367 struct hfi1_ibport
*ibp
= ps
->ibp
;
368 u16 pkey
= hfi1_get_pkey(ibp
, qp
->s_pkey_index
);
369 u16 lrh0
= HFI1_LRH_BTH
;
370 u8 extra_bytes
= -ps
->s_txreq
->s_cur_size
& 3;
371 u32 nwords
= SIZE_OF_CRC
+ ((ps
->s_txreq
->s_cur_size
+
374 if (unlikely(rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) & IB_AH_GRH
)) {
375 struct ib_grh
*grh
= &ps
->s_txreq
->phdr
.hdr
.ibh
.u
.l
.grh
;
378 ps
->s_txreq
->hdr_dwords
+=
379 hfi1_make_grh(ibp
, grh
,
380 rdma_ah_read_grh(&qp
->remote_ah_attr
),
381 ps
->s_txreq
->hdr_dwords
- LRH_9B_DWORDS
,
385 lrh0
|= (priv
->s_sc
& 0xf) << 12 |
386 (rdma_ah_get_sl(&qp
->remote_ah_attr
) & 0xf) << 4;
388 if (qp
->s_mig_state
== IB_MIG_MIGRATED
)
389 bth0
|= IB_BTH_MIG_REQ
;
393 if (qp
->s_flags
& RVT_S_ECN
) {
394 qp
->s_flags
&= ~RVT_S_ECN
;
395 /* we recently received a FECN, so return a BECN */
396 bth1
|= (IB_BECN_MASK
<< IB_BECN_SHIFT
);
402 qp
->s_flags
&= ~HFI1_S_AHG_VALID
;
405 bth0
|= extra_bytes
<< 20;
406 hfi1_make_ruc_bth(qp
, ohdr
, bth0
, bth1
, bth2
);
407 hfi1_make_ib_hdr(&ps
->s_txreq
->phdr
.hdr
.ibh
,
409 ps
->s_txreq
->hdr_dwords
+ nwords
,
410 opa_get_lid(rdma_ah_get_dlid(&qp
->remote_ah_attr
), 9B
),
411 ppd_from_ibp(ibp
)->lid
|
412 rdma_ah_get_path_bits(&qp
->remote_ah_attr
));
415 typedef void (*hfi1_make_ruc_hdr
)(struct rvt_qp
*qp
,
416 struct ib_other_headers
*ohdr
,
417 u32 bth0
, u32 bth1
, u32 bth2
, int middle
,
418 struct hfi1_pkt_state
*ps
);
420 /* We support only two types - 9B and 16B for now */
421 static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl
[2] = {
422 [HFI1_PKT_TYPE_9B
] = &hfi1_make_ruc_header_9B
,
423 [HFI1_PKT_TYPE_16B
] = &hfi1_make_ruc_header_16B
426 void hfi1_make_ruc_header(struct rvt_qp
*qp
, struct ib_other_headers
*ohdr
,
427 u32 bth0
, u32 bth1
, u32 bth2
, int middle
,
428 struct hfi1_pkt_state
*ps
)
430 struct hfi1_qp_priv
*priv
= qp
->priv
;
433 * reset s_ahg/AHG fields
435 * This insures that the ahgentry/ahgcount
436 * are at a non-AHG default to protect
437 * build_verbs_tx_desc() from using
440 * build_ahg() will modify as appropriate
441 * to use the AHG feature.
443 priv
->s_ahg
->tx_flags
= 0;
444 priv
->s_ahg
->ahgcount
= 0;
445 priv
->s_ahg
->ahgidx
= 0;
447 /* Make the appropriate header */
448 hfi1_ruc_header_tbl
[priv
->hdr_type
](qp
, ohdr
, bth0
, bth1
, bth2
, middle
,
452 /* when sending, force a reschedule every one of these periods */
453 #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
456 * hfi1_schedule_send_yield - test for a yield required for QP
458 * @timeout: Final time for timeout slice for jiffies
459 * @qp: a pointer to QP
460 * @ps: a pointer to a structure with commonly lookup values for
461 * the the send engine progress
462 * @tid - true if it is the tid leg
464 * This routine checks if the time slice for the QP has expired
465 * for RC QPs, if so an additional work entry is queued. At this
466 * point, other QPs have an opportunity to be scheduled. It
467 * returns true if a yield is required, otherwise, false
470 bool hfi1_schedule_send_yield(struct rvt_qp
*qp
, struct hfi1_pkt_state
*ps
,
473 ps
->pkts_sent
= true;
475 if (unlikely(time_after(jiffies
, ps
->timeout
))) {
476 if (!ps
->in_thread
||
477 workqueue_congested(ps
->cpu
, ps
->ppd
->hfi1_wq
)) {
478 spin_lock_irqsave(&qp
->s_lock
, ps
->flags
);
480 qp
->s_flags
&= ~RVT_S_BUSY
;
481 hfi1_schedule_send(qp
);
483 struct hfi1_qp_priv
*priv
= qp
->priv
;
486 HFI1_S_TID_BUSY_SET
) {
487 qp
->s_flags
&= ~RVT_S_BUSY
;
489 ~(HFI1_S_TID_BUSY_SET
|
492 priv
->s_flags
&= ~RVT_S_BUSY
;
494 hfi1_schedule_tid_send(qp
);
497 spin_unlock_irqrestore(&qp
->s_lock
, ps
->flags
);
498 this_cpu_inc(*ps
->ppd
->dd
->send_schedule
);
499 trace_hfi1_rc_expired_time_slice(qp
, true);
504 this_cpu_inc(*ps
->ppd
->dd
->send_schedule
);
505 ps
->timeout
= jiffies
+ ps
->timeout_int
;
508 trace_hfi1_rc_expired_time_slice(qp
, false);
512 void hfi1_do_send_from_rvt(struct rvt_qp
*qp
)
514 hfi1_do_send(qp
, false);
517 void _hfi1_do_send(struct work_struct
*work
)
519 struct iowait_work
*w
= container_of(work
, struct iowait_work
, iowork
);
520 struct rvt_qp
*qp
= iowait_to_qp(w
->iow
);
522 hfi1_do_send(qp
, true);
526 * hfi1_do_send - perform a send on a QP
527 * @qp: a pointer to the QP
528 * @in_thread: true if in a workqueue thread
530 * Process entries in the send work queue until credit or queue is
531 * exhausted. Only allow one CPU to send a packet per QP.
532 * Otherwise, two threads could send packets out of order.
534 void hfi1_do_send(struct rvt_qp
*qp
, bool in_thread
)
536 struct hfi1_pkt_state ps
;
537 struct hfi1_qp_priv
*priv
= qp
->priv
;
538 int (*make_req
)(struct rvt_qp
*qp
, struct hfi1_pkt_state
*ps
);
540 ps
.dev
= to_idev(qp
->ibqp
.device
);
541 ps
.ibp
= to_iport(qp
->ibqp
.device
, qp
->port_num
);
542 ps
.ppd
= ppd_from_ibp(ps
.ibp
);
543 ps
.in_thread
= in_thread
;
544 ps
.wait
= iowait_get_ib_work(&priv
->s_iowait
);
546 trace_hfi1_rc_do_send(qp
, in_thread
);
548 switch (qp
->ibqp
.qp_type
) {
550 if (!loopback
&& ((rdma_ah_get_dlid(&qp
->remote_ah_attr
) &
551 ~((1 << ps
.ppd
->lmc
) - 1)) ==
553 rvt_ruc_loopback(qp
);
556 make_req
= hfi1_make_rc_req
;
557 ps
.timeout_int
= qp
->timeout_jiffies
;
560 if (!loopback
&& ((rdma_ah_get_dlid(&qp
->remote_ah_attr
) &
561 ~((1 << ps
.ppd
->lmc
) - 1)) ==
563 rvt_ruc_loopback(qp
);
566 make_req
= hfi1_make_uc_req
;
567 ps
.timeout_int
= SEND_RESCHED_TIMEOUT
;
570 make_req
= hfi1_make_ud_req
;
571 ps
.timeout_int
= SEND_RESCHED_TIMEOUT
;
574 spin_lock_irqsave(&qp
->s_lock
, ps
.flags
);
576 /* Return if we are already busy processing a work request. */
577 if (!hfi1_send_ok(qp
)) {
578 if (qp
->s_flags
& HFI1_S_ANY_WAIT_IO
)
579 iowait_set_flag(&priv
->s_iowait
, IOWAIT_PENDING_IB
);
580 spin_unlock_irqrestore(&qp
->s_lock
, ps
.flags
);
584 qp
->s_flags
|= RVT_S_BUSY
;
586 ps
.timeout_int
= ps
.timeout_int
/ 8;
587 ps
.timeout
= jiffies
+ ps
.timeout_int
;
588 ps
.cpu
= priv
->s_sde
? priv
->s_sde
->cpu
:
589 cpumask_first(cpumask_of_node(ps
.ppd
->dd
->node
));
590 ps
.pkts_sent
= false;
592 /* insure a pre-built packet is handled */
593 ps
.s_txreq
= get_waiting_verbs_txreq(ps
.wait
);
595 /* Check for a constructed packet to be sent. */
597 if (priv
->s_flags
& HFI1_S_TID_BUSY_SET
)
598 qp
->s_flags
|= RVT_S_BUSY
;
599 spin_unlock_irqrestore(&qp
->s_lock
, ps
.flags
);
601 * If the packet cannot be sent now, return and
602 * the send engine will be woken up later.
604 if (hfi1_verbs_send(qp
, &ps
))
607 /* allow other tasks to run */
608 if (hfi1_schedule_send_yield(qp
, &ps
, false))
611 spin_lock_irqsave(&qp
->s_lock
, ps
.flags
);
613 } while (make_req(qp
, &ps
));
614 iowait_starve_clear(ps
.pkts_sent
, &priv
->s_iowait
);
615 spin_unlock_irqrestore(&qp
->s_lock
, ps
.flags
);