2 * Copyright(c) 2015 - 2019 Intel Corporation.
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 #include <linux/err.h>
49 #include <linux/vmalloc.h>
50 #include <linux/hash.h>
51 #include <linux/module.h>
52 #include <linux/seq_file.h>
53 #include <rdma/rdma_vt.h>
54 #include <rdma/rdmavt_qp.h>
55 #include <rdma/ib_verbs.h>
60 #include "verbs_txreq.h"
62 unsigned int hfi1_qp_table_size
= 256;
63 module_param_named(qp_table_size
, hfi1_qp_table_size
, uint
, S_IRUGO
);
64 MODULE_PARM_DESC(qp_table_size
, "QP table size");
66 static void flush_tx_list(struct rvt_qp
*qp
);
67 static int iowait_sleep(
68 struct sdma_engine
*sde
,
69 struct iowait_work
*wait
,
70 struct sdma_txreq
*stx
,
73 static void iowait_wakeup(struct iowait
*wait
, int reason
);
74 static void iowait_sdma_drained(struct iowait
*wait
);
75 static void qp_pio_drain(struct rvt_qp
*qp
);
77 const struct rvt_operation_params hfi1_post_parms
[RVT_OPERATION_MAX
] = {
78 [IB_WR_RDMA_WRITE
] = {
79 .length
= sizeof(struct ib_rdma_wr
),
80 .qpt_support
= BIT(IB_QPT_UC
) | BIT(IB_QPT_RC
),
84 .length
= sizeof(struct ib_rdma_wr
),
85 .qpt_support
= BIT(IB_QPT_RC
),
86 .flags
= RVT_OPERATION_ATOMIC
,
89 [IB_WR_ATOMIC_CMP_AND_SWP
] = {
90 .length
= sizeof(struct ib_atomic_wr
),
91 .qpt_support
= BIT(IB_QPT_RC
),
92 .flags
= RVT_OPERATION_ATOMIC
| RVT_OPERATION_ATOMIC_SGE
,
95 [IB_WR_ATOMIC_FETCH_AND_ADD
] = {
96 .length
= sizeof(struct ib_atomic_wr
),
97 .qpt_support
= BIT(IB_QPT_RC
),
98 .flags
= RVT_OPERATION_ATOMIC
| RVT_OPERATION_ATOMIC_SGE
,
101 [IB_WR_RDMA_WRITE_WITH_IMM
] = {
102 .length
= sizeof(struct ib_rdma_wr
),
103 .qpt_support
= BIT(IB_QPT_UC
) | BIT(IB_QPT_RC
),
107 .length
= sizeof(struct ib_send_wr
),
108 .qpt_support
= BIT(IB_QPT_UD
) | BIT(IB_QPT_SMI
) | BIT(IB_QPT_GSI
) |
109 BIT(IB_QPT_UC
) | BIT(IB_QPT_RC
),
112 [IB_WR_SEND_WITH_IMM
] = {
113 .length
= sizeof(struct ib_send_wr
),
114 .qpt_support
= BIT(IB_QPT_UD
) | BIT(IB_QPT_SMI
) | BIT(IB_QPT_GSI
) |
115 BIT(IB_QPT_UC
) | BIT(IB_QPT_RC
),
119 .length
= sizeof(struct ib_reg_wr
),
120 .qpt_support
= BIT(IB_QPT_UC
) | BIT(IB_QPT_RC
),
121 .flags
= RVT_OPERATION_LOCAL
,
124 [IB_WR_LOCAL_INV
] = {
125 .length
= sizeof(struct ib_send_wr
),
126 .qpt_support
= BIT(IB_QPT_UC
) | BIT(IB_QPT_RC
),
127 .flags
= RVT_OPERATION_LOCAL
,
130 [IB_WR_SEND_WITH_INV
] = {
131 .length
= sizeof(struct ib_send_wr
),
132 .qpt_support
= BIT(IB_QPT_RC
),
136 .length
= sizeof(struct ib_atomic_wr
),
137 .qpt_support
= BIT(IB_QPT_RC
),
138 .flags
= RVT_OPERATION_USE_RESERVE
,
141 [IB_WR_TID_RDMA_WRITE
] = {
142 .length
= sizeof(struct ib_rdma_wr
),
143 .qpt_support
= BIT(IB_QPT_RC
),
144 .flags
= RVT_OPERATION_IGN_RNR_CNT
,
149 static void flush_list_head(struct list_head
*l
)
151 while (!list_empty(l
)) {
152 struct sdma_txreq
*tx
;
154 tx
= list_first_entry(
158 list_del_init(&tx
->list
);
160 container_of(tx
, struct verbs_txreq
, txreq
));
164 static void flush_tx_list(struct rvt_qp
*qp
)
166 struct hfi1_qp_priv
*priv
= qp
->priv
;
168 flush_list_head(&iowait_get_ib_work(&priv
->s_iowait
)->tx_head
);
169 flush_list_head(&iowait_get_tid_work(&priv
->s_iowait
)->tx_head
);
172 static void flush_iowait(struct rvt_qp
*qp
)
174 struct hfi1_qp_priv
*priv
= qp
->priv
;
176 seqlock_t
*lock
= priv
->s_iowait
.lock
;
180 write_seqlock_irqsave(lock
, flags
);
181 if (!list_empty(&priv
->s_iowait
.list
)) {
182 list_del_init(&priv
->s_iowait
.list
);
183 priv
->s_iowait
.lock
= NULL
;
186 write_sequnlock_irqrestore(lock
, flags
);
189 static inline int opa_mtu_enum_to_int(int mtu
)
192 case OPA_MTU_8192
: return 8192;
193 case OPA_MTU_10240
: return 10240;
199 * This function is what we would push to the core layer if we wanted to be a
200 * "first class citizen". Instead we hide this here and rely on Verbs ULPs
201 * to blindly pass the MTU enum value from the PathRecord to us.
203 static inline int verbs_mtu_enum_to_int(struct ib_device
*dev
, enum ib_mtu mtu
)
207 /* Constraining 10KB packets to 8KB packets */
208 if (mtu
== (enum ib_mtu
)OPA_MTU_10240
)
210 val
= opa_mtu_enum_to_int((int)mtu
);
213 return ib_mtu_enum_to_int(mtu
);
216 int hfi1_check_modify_qp(struct rvt_qp
*qp
, struct ib_qp_attr
*attr
,
217 int attr_mask
, struct ib_udata
*udata
)
219 struct ib_qp
*ibqp
= &qp
->ibqp
;
220 struct hfi1_ibdev
*dev
= to_idev(ibqp
->device
);
221 struct hfi1_devdata
*dd
= dd_from_dev(dev
);
224 if (attr_mask
& IB_QP_AV
) {
225 sc
= ah_to_sc(ibqp
->device
, &attr
->ah_attr
);
229 if (!qp_to_sdma_engine(qp
, sc
) &&
230 dd
->flags
& HFI1_HAS_SEND_DMA
)
233 if (!qp_to_send_context(qp
, sc
))
237 if (attr_mask
& IB_QP_ALT_PATH
) {
238 sc
= ah_to_sc(ibqp
->device
, &attr
->alt_ah_attr
);
242 if (!qp_to_sdma_engine(qp
, sc
) &&
243 dd
->flags
& HFI1_HAS_SEND_DMA
)
246 if (!qp_to_send_context(qp
, sc
))
254 * qp_set_16b - Set the hdr_type based on whether the slid or the
255 * dlid in the connection is extended. Only applicable for RC and UC
256 * QPs. UD QPs determine this on the fly from the ah in the wqe
258 static inline void qp_set_16b(struct rvt_qp
*qp
)
260 struct hfi1_pportdata
*ppd
;
261 struct hfi1_ibport
*ibp
;
262 struct hfi1_qp_priv
*priv
= qp
->priv
;
264 /* Update ah_attr to account for extended LIDs */
265 hfi1_update_ah_attr(qp
->ibqp
.device
, &qp
->remote_ah_attr
);
267 /* Create 32 bit LIDs */
268 hfi1_make_opa_lid(&qp
->remote_ah_attr
);
270 if (!(rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) & IB_AH_GRH
))
273 ibp
= to_iport(qp
->ibqp
.device
, qp
->port_num
);
274 ppd
= ppd_from_ibp(ibp
);
275 priv
->hdr_type
= hfi1_get_hdr_type(ppd
->lid
, &qp
->remote_ah_attr
);
278 void hfi1_modify_qp(struct rvt_qp
*qp
, struct ib_qp_attr
*attr
,
279 int attr_mask
, struct ib_udata
*udata
)
281 struct ib_qp
*ibqp
= &qp
->ibqp
;
282 struct hfi1_qp_priv
*priv
= qp
->priv
;
284 if (attr_mask
& IB_QP_AV
) {
285 priv
->s_sc
= ah_to_sc(ibqp
->device
, &qp
->remote_ah_attr
);
286 priv
->s_sde
= qp_to_sdma_engine(qp
, priv
->s_sc
);
287 priv
->s_sendcontext
= qp_to_send_context(qp
, priv
->s_sc
);
291 if (attr_mask
& IB_QP_PATH_MIG_STATE
&&
292 attr
->path_mig_state
== IB_MIG_MIGRATED
&&
293 qp
->s_mig_state
== IB_MIG_ARMED
) {
294 qp
->s_flags
|= HFI1_S_AHG_CLEAR
;
295 priv
->s_sc
= ah_to_sc(ibqp
->device
, &qp
->remote_ah_attr
);
296 priv
->s_sde
= qp_to_sdma_engine(qp
, priv
->s_sc
);
297 priv
->s_sendcontext
= qp_to_send_context(qp
, priv
->s_sc
);
301 opfn_qp_init(qp
, attr
, attr_mask
);
305 * hfi1_setup_wqe - set up the wqe
307 * @wqe - The built wqe
308 * @call_send - Determine if the send should be posted or scheduled.
310 * Perform setup of the wqe. This is called
311 * prior to inserting the wqe into the ring but after
312 * the wqe has been setup by RDMAVT. This function
313 * allows the driver the opportunity to perform
314 * validation and additional setup of the wqe.
316 * Returns 0 on success, -EINVAL on failure
319 int hfi1_setup_wqe(struct rvt_qp
*qp
, struct rvt_swqe
*wqe
, bool *call_send
)
321 struct hfi1_ibport
*ibp
= to_iport(qp
->ibqp
.device
, qp
->port_num
);
323 struct hfi1_pportdata
*ppd
;
324 struct hfi1_devdata
*dd
;
326 switch (qp
->ibqp
.qp_type
) {
328 hfi1_setup_tid_rdma_wqe(qp
, wqe
);
331 if (wqe
->length
> 0x80000000U
)
333 if (wqe
->length
> qp
->pmtu
)
338 * SM packets should exclusively use VL15 and their SL is
339 * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
340 * is created, SL is 0 in most cases and as a result some
341 * fields (vl and pmtu) in ah may not be set correctly,
342 * depending on the SL2SC and SC2VL tables at the time.
344 ppd
= ppd_from_ibp(ibp
);
345 dd
= dd_from_ppd(ppd
);
346 if (wqe
->length
> dd
->vld
[15].mtu
)
351 ah
= rvt_get_swqe_ah(wqe
);
352 if (wqe
->length
> (1 << ah
->log_pmtu
))
354 if (ibp
->sl_to_sc
[rdma_ah_get_sl(&ah
->attr
)] == 0xf)
361 * System latency between send and schedule is large enough that
362 * forcing call_send to true for piothreshold packets is necessary.
364 if (wqe
->length
<= piothreshold
)
370 * _hfi1_schedule_send - schedule progress
373 * This schedules qp progress w/o regard to the s_flags.
375 * It is only used in the post send, which doesn't hold
378 bool _hfi1_schedule_send(struct rvt_qp
*qp
)
380 struct hfi1_qp_priv
*priv
= qp
->priv
;
381 struct hfi1_ibport
*ibp
=
382 to_iport(qp
->ibqp
.device
, qp
->port_num
);
383 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
384 struct hfi1_devdata
*dd
= dd_from_ibdev(qp
->ibqp
.device
);
386 return iowait_schedule(&priv
->s_iowait
, ppd
->hfi1_wq
,
389 cpumask_first(cpumask_of_node(dd
->node
)));
392 static void qp_pio_drain(struct rvt_qp
*qp
)
394 struct hfi1_qp_priv
*priv
= qp
->priv
;
396 if (!priv
->s_sendcontext
)
398 while (iowait_pio_pending(&priv
->s_iowait
)) {
399 write_seqlock_irq(&priv
->s_sendcontext
->waitlock
);
400 hfi1_sc_wantpiobuf_intr(priv
->s_sendcontext
, 1);
401 write_sequnlock_irq(&priv
->s_sendcontext
->waitlock
);
402 iowait_pio_drain(&priv
->s_iowait
);
403 write_seqlock_irq(&priv
->s_sendcontext
->waitlock
);
404 hfi1_sc_wantpiobuf_intr(priv
->s_sendcontext
, 0);
405 write_sequnlock_irq(&priv
->s_sendcontext
->waitlock
);
410 * hfi1_schedule_send - schedule progress
413 * This schedules qp progress and caller should hold
415 * @return true if the first leg is scheduled;
416 * false if the first leg is not scheduled.
418 bool hfi1_schedule_send(struct rvt_qp
*qp
)
420 lockdep_assert_held(&qp
->s_lock
);
421 if (hfi1_send_ok(qp
)) {
422 _hfi1_schedule_send(qp
);
425 if (qp
->s_flags
& HFI1_S_ANY_WAIT_IO
)
426 iowait_set_flag(&((struct hfi1_qp_priv
*)qp
->priv
)->s_iowait
,
431 static void hfi1_qp_schedule(struct rvt_qp
*qp
)
433 struct hfi1_qp_priv
*priv
= qp
->priv
;
436 if (iowait_flag_set(&priv
->s_iowait
, IOWAIT_PENDING_IB
)) {
437 ret
= hfi1_schedule_send(qp
);
439 iowait_clear_flag(&priv
->s_iowait
, IOWAIT_PENDING_IB
);
441 if (iowait_flag_set(&priv
->s_iowait
, IOWAIT_PENDING_TID
)) {
442 ret
= hfi1_schedule_tid_send(qp
);
444 iowait_clear_flag(&priv
->s_iowait
, IOWAIT_PENDING_TID
);
448 void hfi1_qp_wakeup(struct rvt_qp
*qp
, u32 flag
)
452 spin_lock_irqsave(&qp
->s_lock
, flags
);
453 if (qp
->s_flags
& flag
) {
454 qp
->s_flags
&= ~flag
;
455 trace_hfi1_qpwakeup(qp
, flag
);
456 hfi1_qp_schedule(qp
);
458 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
459 /* Notify hfi1_destroy_qp() if it is waiting. */
463 void hfi1_qp_unbusy(struct rvt_qp
*qp
, struct iowait_work
*wait
)
465 struct hfi1_qp_priv
*priv
= qp
->priv
;
467 if (iowait_set_work_flag(wait
) == IOWAIT_IB_SE
) {
468 qp
->s_flags
&= ~RVT_S_BUSY
;
470 * If we are sending a first-leg packet from the second leg,
471 * we need to clear the busy flag from priv->s_flags to
472 * avoid a race condition when the qp wakes up before
473 * the call to hfi1_verbs_send() returns to the second
474 * leg. In that case, the second leg will terminate without
475 * being re-scheduled, resulting in failure to send TID RDMA
476 * WRITE DATA and TID RDMA ACK packets.
478 if (priv
->s_flags
& HFI1_S_TID_BUSY_SET
) {
479 priv
->s_flags
&= ~(HFI1_S_TID_BUSY_SET
|
481 iowait_set_flag(&priv
->s_iowait
, IOWAIT_PENDING_TID
);
484 priv
->s_flags
&= ~RVT_S_BUSY
;
488 static int iowait_sleep(
489 struct sdma_engine
*sde
,
490 struct iowait_work
*wait
,
491 struct sdma_txreq
*stx
,
495 struct verbs_txreq
*tx
= container_of(stx
, struct verbs_txreq
, txreq
);
497 struct hfi1_qp_priv
*priv
;
504 spin_lock_irqsave(&qp
->s_lock
, flags
);
505 if (ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_RECV_OK
) {
507 * If we couldn't queue the DMA request, save the info
508 * and try again later rather than destroying the
509 * buffer and undoing the side effects of the copy.
511 /* Make a common routine? */
512 list_add_tail(&stx
->list
, &wait
->tx_head
);
513 write_seqlock(&sde
->waitlock
);
514 if (sdma_progress(sde
, seq
, stx
))
516 if (list_empty(&priv
->s_iowait
.list
)) {
517 struct hfi1_ibport
*ibp
=
518 to_iport(qp
->ibqp
.device
, qp
->port_num
);
520 ibp
->rvp
.n_dmawait
++;
521 qp
->s_flags
|= RVT_S_WAIT_DMA_DESC
;
522 iowait_get_priority(&priv
->s_iowait
);
523 iowait_queue(pkts_sent
, &priv
->s_iowait
,
525 priv
->s_iowait
.lock
= &sde
->waitlock
;
526 trace_hfi1_qpsleep(qp
, RVT_S_WAIT_DMA_DESC
);
529 write_sequnlock(&sde
->waitlock
);
530 hfi1_qp_unbusy(qp
, wait
);
531 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
534 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
539 write_sequnlock(&sde
->waitlock
);
540 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
541 list_del_init(&stx
->list
);
545 static void iowait_wakeup(struct iowait
*wait
, int reason
)
547 struct rvt_qp
*qp
= iowait_to_qp(wait
);
549 WARN_ON(reason
!= SDMA_AVAIL_REASON
);
550 hfi1_qp_wakeup(qp
, RVT_S_WAIT_DMA_DESC
);
553 static void iowait_sdma_drained(struct iowait
*wait
)
555 struct rvt_qp
*qp
= iowait_to_qp(wait
);
559 * This happens when the send engine notes
560 * a QP in the error state and cannot
561 * do the flush work until that QP's
562 * sdma work has finished.
564 spin_lock_irqsave(&qp
->s_lock
, flags
);
565 if (qp
->s_flags
& RVT_S_WAIT_DMA
) {
566 qp
->s_flags
&= ~RVT_S_WAIT_DMA
;
567 hfi1_schedule_send(qp
);
569 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
572 static void hfi1_init_priority(struct iowait
*w
)
574 struct rvt_qp
*qp
= iowait_to_qp(w
);
575 struct hfi1_qp_priv
*priv
= qp
->priv
;
577 if (qp
->s_flags
& RVT_S_ACK_PENDING
)
579 if (priv
->s_flags
& RVT_S_ACK_PENDING
)
584 * qp_to_sdma_engine - map a qp to a send engine
589 * A send engine for the qp or NULL for SMI type qp.
591 struct sdma_engine
*qp_to_sdma_engine(struct rvt_qp
*qp
, u8 sc5
)
593 struct hfi1_devdata
*dd
= dd_from_ibdev(qp
->ibqp
.device
);
594 struct sdma_engine
*sde
;
596 if (!(dd
->flags
& HFI1_HAS_SEND_DMA
))
598 switch (qp
->ibqp
.qp_type
) {
604 sde
= sdma_select_engine_sc(dd
, qp
->ibqp
.qp_num
>> dd
->qos_shift
, sc5
);
609 * qp_to_send_context - map a qp to a send context
614 * A send context for the qp
616 struct send_context
*qp_to_send_context(struct rvt_qp
*qp
, u8 sc5
)
618 struct hfi1_devdata
*dd
= dd_from_ibdev(qp
->ibqp
.device
);
620 switch (qp
->ibqp
.qp_type
) {
622 /* SMA packets to VL15 */
623 return dd
->vld
[15].sc
;
628 return pio_select_send_context_sc(dd
, qp
->ibqp
.qp_num
>> dd
->qos_shift
,
632 static const char * const qp_type_str
[] = {
633 "SMI", "GSI", "RC", "UC", "UD",
636 static int qp_idle(struct rvt_qp
*qp
)
639 qp
->s_last
== qp
->s_acked
&&
640 qp
->s_acked
== qp
->s_cur
&&
641 qp
->s_cur
== qp
->s_tail
&&
642 qp
->s_tail
== qp
->s_head
;
646 * qp_iter_print - print the qp information to seq_file
647 * @s: the seq_file to emit the qp information on
648 * @iter: the iterator for the qp hash list
650 void qp_iter_print(struct seq_file
*s
, struct rvt_qp_iter
*iter
)
652 struct rvt_swqe
*wqe
;
653 struct rvt_qp
*qp
= iter
->qp
;
654 struct hfi1_qp_priv
*priv
= qp
->priv
;
655 struct sdma_engine
*sde
;
656 struct send_context
*send_context
;
657 struct rvt_ack_entry
*e
= NULL
;
658 struct rvt_srq
*srq
= qp
->ibqp
.srq
?
659 ibsrq_to_rvtsrq(qp
->ibqp
.srq
) : NULL
;
661 sde
= qp_to_sdma_engine(qp
, priv
->s_sc
);
662 wqe
= rvt_get_swqe_ptr(qp
, qp
->s_last
);
663 send_context
= qp_to_send_context(qp
, priv
->s_sc
);
665 e
= &qp
->s_ack_queue
[qp
->s_tail_ack_queue
];
667 "N %d %s QP %x R %u %s %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x RNR %d %s %d\n",
669 qp_idle(qp
) ? "I" : "B",
671 atomic_read(&qp
->refcount
),
672 qp_type_str
[qp
->ibqp
.qp_type
],
674 wqe
? wqe
->wr
.opcode
: 0,
676 iowait_sdma_pending(&priv
->s_iowait
),
677 iowait_pio_pending(&priv
->s_iowait
),
678 !list_empty(&priv
->s_iowait
.list
),
683 qp
->s_psn
, qp
->s_next_psn
,
684 qp
->s_sending_psn
, qp
->s_sending_hpsn
,
686 qp
->s_last
, qp
->s_acked
, qp
->s_cur
,
687 qp
->s_tail
, qp
->s_head
, qp
->s_size
,
689 /* ack_queue ring pointers, size */
690 qp
->s_tail_ack_queue
, qp
->r_head_ack_queue
,
691 rvt_max_atomic(&to_idev(qp
->ibqp
.device
)->rdi
),
694 rdma_ah_get_dlid(&qp
->remote_ah_attr
),
695 rdma_ah_get_sl(&qp
->remote_ah_attr
),
702 sde
? sde
->this_idx
: 0,
704 send_context
? send_context
->sw_index
: 0,
705 ib_cq_head(qp
->ibqp
.send_cq
),
706 ib_cq_tail(qp
->ibqp
.send_cq
),
710 /* ack queue information */
716 srq
? srq
->rq
.size
: qp
->r_rq
.size
720 void *qp_priv_alloc(struct rvt_dev_info
*rdi
, struct rvt_qp
*qp
)
722 struct hfi1_qp_priv
*priv
;
724 priv
= kzalloc_node(sizeof(*priv
), GFP_KERNEL
, rdi
->dparms
.node
);
726 return ERR_PTR(-ENOMEM
);
730 priv
->s_ahg
= kzalloc_node(sizeof(*priv
->s_ahg
), GFP_KERNEL
,
734 return ERR_PTR(-ENOMEM
);
745 /* Init to a value to start the running average correctly */
746 priv
->s_running_pkt_size
= piothreshold
/ 2;
750 void qp_priv_free(struct rvt_dev_info
*rdi
, struct rvt_qp
*qp
)
752 struct hfi1_qp_priv
*priv
= qp
->priv
;
754 hfi1_qp_priv_tid_free(rdi
, qp
);
759 unsigned free_all_qps(struct rvt_dev_info
*rdi
)
761 struct hfi1_ibdev
*verbs_dev
= container_of(rdi
,
764 struct hfi1_devdata
*dd
= container_of(verbs_dev
,
768 unsigned qp_inuse
= 0;
770 for (n
= 0; n
< dd
->num_pports
; n
++) {
771 struct hfi1_ibport
*ibp
= &dd
->pport
[n
].ibport_data
;
774 if (rcu_dereference(ibp
->rvp
.qp
[0]))
776 if (rcu_dereference(ibp
->rvp
.qp
[1]))
784 void flush_qp_waiters(struct rvt_qp
*qp
)
786 lockdep_assert_held(&qp
->s_lock
);
788 hfi1_tid_rdma_flush_wait(qp
);
791 void stop_send_queue(struct rvt_qp
*qp
)
793 struct hfi1_qp_priv
*priv
= qp
->priv
;
795 iowait_cancel_work(&priv
->s_iowait
);
796 if (cancel_work_sync(&priv
->tid_rdma
.trigger_work
))
800 void quiesce_qp(struct rvt_qp
*qp
)
802 struct hfi1_qp_priv
*priv
= qp
->priv
;
804 hfi1_del_tid_reap_timer(qp
);
805 hfi1_del_tid_retry_timer(qp
);
806 iowait_sdma_drain(&priv
->s_iowait
);
811 void notify_qp_reset(struct rvt_qp
*qp
)
813 hfi1_qp_kern_exp_rcv_clear_all(qp
);
817 /* Clear any OPFN state */
818 if (qp
->ibqp
.qp_type
== IB_QPT_RC
)
823 * Switch to alternate path.
824 * The QP s_lock should be held and interrupts disabled.
826 void hfi1_migrate_qp(struct rvt_qp
*qp
)
828 struct hfi1_qp_priv
*priv
= qp
->priv
;
831 qp
->s_mig_state
= IB_MIG_MIGRATED
;
832 qp
->remote_ah_attr
= qp
->alt_ah_attr
;
833 qp
->port_num
= rdma_ah_get_port_num(&qp
->alt_ah_attr
);
834 qp
->s_pkey_index
= qp
->s_alt_pkey_index
;
835 qp
->s_flags
|= HFI1_S_AHG_CLEAR
;
836 priv
->s_sc
= ah_to_sc(qp
->ibqp
.device
, &qp
->remote_ah_attr
);
837 priv
->s_sde
= qp_to_sdma_engine(qp
, priv
->s_sc
);
840 ev
.device
= qp
->ibqp
.device
;
841 ev
.element
.qp
= &qp
->ibqp
;
842 ev
.event
= IB_EVENT_PATH_MIG
;
843 qp
->ibqp
.event_handler(&ev
, qp
->ibqp
.qp_context
);
846 int mtu_to_path_mtu(u32 mtu
)
848 return mtu_to_enum(mtu
, OPA_MTU_8192
);
851 u32
mtu_from_qp(struct rvt_dev_info
*rdi
, struct rvt_qp
*qp
, u32 pmtu
)
854 struct hfi1_ibdev
*verbs_dev
= container_of(rdi
,
857 struct hfi1_devdata
*dd
= container_of(verbs_dev
,
860 struct hfi1_ibport
*ibp
;
863 ibp
= &dd
->pport
[qp
->port_num
- 1].ibport_data
;
864 sc
= ibp
->sl_to_sc
[rdma_ah_get_sl(&qp
->remote_ah_attr
)];
865 vl
= sc_to_vlt(dd
, sc
);
867 mtu
= verbs_mtu_enum_to_int(qp
->ibqp
.device
, pmtu
);
868 if (vl
< PER_VL_SEND_CONTEXTS
)
869 mtu
= min_t(u32
, mtu
, dd
->vld
[vl
].mtu
);
873 int get_pmtu_from_attr(struct rvt_dev_info
*rdi
, struct rvt_qp
*qp
,
874 struct ib_qp_attr
*attr
)
876 int mtu
, pidx
= qp
->port_num
- 1;
877 struct hfi1_ibdev
*verbs_dev
= container_of(rdi
,
880 struct hfi1_devdata
*dd
= container_of(verbs_dev
,
883 mtu
= verbs_mtu_enum_to_int(qp
->ibqp
.device
, attr
->path_mtu
);
885 return -1; /* values less than 0 are error */
887 if (mtu
> dd
->pport
[pidx
].ibmtu
)
888 return mtu_to_enum(dd
->pport
[pidx
].ibmtu
, IB_MTU_2048
);
890 return attr
->path_mtu
;
893 void notify_error_qp(struct rvt_qp
*qp
)
895 struct hfi1_qp_priv
*priv
= qp
->priv
;
896 seqlock_t
*lock
= priv
->s_iowait
.lock
;
900 if (!list_empty(&priv
->s_iowait
.list
) &&
901 !(qp
->s_flags
& RVT_S_BUSY
) &&
902 !(priv
->s_flags
& RVT_S_BUSY
)) {
903 qp
->s_flags
&= ~HFI1_S_ANY_WAIT_IO
;
904 iowait_clear_flag(&priv
->s_iowait
, IOWAIT_PENDING_IB
);
905 iowait_clear_flag(&priv
->s_iowait
, IOWAIT_PENDING_TID
);
906 list_del_init(&priv
->s_iowait
.list
);
907 priv
->s_iowait
.lock
= NULL
;
910 write_sequnlock(lock
);
913 if (!(qp
->s_flags
& RVT_S_BUSY
) && !(priv
->s_flags
& RVT_S_BUSY
)) {
916 rvt_put_mr(qp
->s_rdma_mr
);
917 qp
->s_rdma_mr
= NULL
;
924 * hfi1_qp_iter_cb - callback for iterator
926 * @v - the sl in low bits of v
928 * This is called from the iterator callback to work
929 * on an individual qp.
931 static void hfi1_qp_iter_cb(struct rvt_qp
*qp
, u64 v
)
935 struct hfi1_ibport
*ibp
=
936 to_iport(qp
->ibqp
.device
, qp
->port_num
);
937 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
940 if (qp
->port_num
!= ppd
->port
||
941 (qp
->ibqp
.qp_type
!= IB_QPT_UC
&&
942 qp
->ibqp
.qp_type
!= IB_QPT_RC
) ||
943 rdma_ah_get_sl(&qp
->remote_ah_attr
) != sl
||
944 !(ib_rvt_state_ops
[qp
->state
] & RVT_POST_SEND_OK
))
947 spin_lock_irq(&qp
->r_lock
);
948 spin_lock(&qp
->s_hlock
);
949 spin_lock(&qp
->s_lock
);
950 lastwqe
= rvt_error_qp(qp
, IB_WC_WR_FLUSH_ERR
);
951 spin_unlock(&qp
->s_lock
);
952 spin_unlock(&qp
->s_hlock
);
953 spin_unlock_irq(&qp
->r_lock
);
955 ev
.device
= qp
->ibqp
.device
;
956 ev
.element
.qp
= &qp
->ibqp
;
957 ev
.event
= IB_EVENT_QP_LAST_WQE_REACHED
;
958 qp
->ibqp
.event_handler(&ev
, qp
->ibqp
.qp_context
);
963 * hfi1_error_port_qps - put a port's RC/UC qps into error state
965 * @sl: the service level.
967 * This function places all RC/UC qps with a given service level into error
968 * state. It is generally called to force upper lay apps to abandon stale qps
969 * after an sl->sc mapping change.
971 void hfi1_error_port_qps(struct hfi1_ibport
*ibp
, u8 sl
)
973 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
974 struct hfi1_ibdev
*dev
= &ppd
->dd
->verbs_dev
;
976 rvt_qp_iter(&dev
->rdi
, sl
, hfi1_qp_iter_cb
);