2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/skbuff.h>
38 #include "rxe_queue.h"
55 COMPST_EXIT
, /* We have an issue, and we want to rerun the completer */
56 COMPST_DONE
, /* The completer finished successflly */
59 static char *comp_state_name
[] = {
60 [COMPST_GET_ACK
] = "GET ACK",
61 [COMPST_GET_WQE
] = "GET WQE",
62 [COMPST_COMP_WQE
] = "COMP WQE",
63 [COMPST_COMP_ACK
] = "COMP ACK",
64 [COMPST_CHECK_PSN
] = "CHECK PSN",
65 [COMPST_CHECK_ACK
] = "CHECK ACK",
66 [COMPST_READ
] = "READ",
67 [COMPST_ATOMIC
] = "ATOMIC",
68 [COMPST_WRITE_SEND
] = "WRITE/SEND",
69 [COMPST_UPDATE_COMP
] = "UPDATE COMP",
70 [COMPST_ERROR_RETRY
] = "ERROR RETRY",
71 [COMPST_RNR_RETRY
] = "RNR RETRY",
72 [COMPST_ERROR
] = "ERROR",
73 [COMPST_EXIT
] = "EXIT",
74 [COMPST_DONE
] = "DONE",
77 static unsigned long rnrnak_usec
[32] = {
78 [IB_RNR_TIMER_655_36
] = 655360,
79 [IB_RNR_TIMER_000_01
] = 10,
80 [IB_RNR_TIMER_000_02
] = 20,
81 [IB_RNR_TIMER_000_03
] = 30,
82 [IB_RNR_TIMER_000_04
] = 40,
83 [IB_RNR_TIMER_000_06
] = 60,
84 [IB_RNR_TIMER_000_08
] = 80,
85 [IB_RNR_TIMER_000_12
] = 120,
86 [IB_RNR_TIMER_000_16
] = 160,
87 [IB_RNR_TIMER_000_24
] = 240,
88 [IB_RNR_TIMER_000_32
] = 320,
89 [IB_RNR_TIMER_000_48
] = 480,
90 [IB_RNR_TIMER_000_64
] = 640,
91 [IB_RNR_TIMER_000_96
] = 960,
92 [IB_RNR_TIMER_001_28
] = 1280,
93 [IB_RNR_TIMER_001_92
] = 1920,
94 [IB_RNR_TIMER_002_56
] = 2560,
95 [IB_RNR_TIMER_003_84
] = 3840,
96 [IB_RNR_TIMER_005_12
] = 5120,
97 [IB_RNR_TIMER_007_68
] = 7680,
98 [IB_RNR_TIMER_010_24
] = 10240,
99 [IB_RNR_TIMER_015_36
] = 15360,
100 [IB_RNR_TIMER_020_48
] = 20480,
101 [IB_RNR_TIMER_030_72
] = 30720,
102 [IB_RNR_TIMER_040_96
] = 40960,
103 [IB_RNR_TIMER_061_44
] = 61410,
104 [IB_RNR_TIMER_081_92
] = 81920,
105 [IB_RNR_TIMER_122_88
] = 122880,
106 [IB_RNR_TIMER_163_84
] = 163840,
107 [IB_RNR_TIMER_245_76
] = 245760,
108 [IB_RNR_TIMER_327_68
] = 327680,
109 [IB_RNR_TIMER_491_52
] = 491520,
112 static inline unsigned long rnrnak_jiffies(u8 timeout
)
114 return max_t(unsigned long,
115 usecs_to_jiffies(rnrnak_usec
[timeout
]), 1);
118 static enum ib_wc_opcode
wr_to_wc_opcode(enum ib_wr_opcode opcode
)
121 case IB_WR_RDMA_WRITE
: return IB_WC_RDMA_WRITE
;
122 case IB_WR_RDMA_WRITE_WITH_IMM
: return IB_WC_RDMA_WRITE
;
123 case IB_WR_SEND
: return IB_WC_SEND
;
124 case IB_WR_SEND_WITH_IMM
: return IB_WC_SEND
;
125 case IB_WR_RDMA_READ
: return IB_WC_RDMA_READ
;
126 case IB_WR_ATOMIC_CMP_AND_SWP
: return IB_WC_COMP_SWAP
;
127 case IB_WR_ATOMIC_FETCH_AND_ADD
: return IB_WC_FETCH_ADD
;
128 case IB_WR_LSO
: return IB_WC_LSO
;
129 case IB_WR_SEND_WITH_INV
: return IB_WC_SEND
;
130 case IB_WR_RDMA_READ_WITH_INV
: return IB_WC_RDMA_READ
;
131 case IB_WR_LOCAL_INV
: return IB_WC_LOCAL_INV
;
132 case IB_WR_REG_MR
: return IB_WC_REG_MR
;
139 void retransmit_timer(unsigned long data
)
141 struct rxe_qp
*qp
= (struct rxe_qp
*)data
;
144 qp
->comp
.timeout
= 1;
145 rxe_run_task(&qp
->comp
.task
, 1);
149 void rxe_comp_queue_pkt(struct rxe_dev
*rxe
, struct rxe_qp
*qp
,
154 skb_queue_tail(&qp
->resp_pkts
, skb
);
156 must_sched
= skb_queue_len(&qp
->resp_pkts
) > 1;
157 rxe_run_task(&qp
->comp
.task
, must_sched
);
160 static inline enum comp_state
get_wqe(struct rxe_qp
*qp
,
161 struct rxe_pkt_info
*pkt
,
162 struct rxe_send_wqe
**wqe_p
)
164 struct rxe_send_wqe
*wqe
;
166 /* we come here whether or not we found a response packet to see if
167 * there are any posted WQEs
169 wqe
= queue_head(qp
->sq
.queue
);
172 /* no WQE or requester has not started it yet */
173 if (!wqe
|| wqe
->state
== wqe_state_posted
)
174 return pkt
? COMPST_DONE
: COMPST_EXIT
;
176 /* WQE does not require an ack */
177 if (wqe
->state
== wqe_state_done
)
178 return COMPST_COMP_WQE
;
180 /* WQE caused an error */
181 if (wqe
->state
== wqe_state_error
)
184 /* we have a WQE, if we also have an ack check its PSN */
185 return pkt
? COMPST_CHECK_PSN
: COMPST_EXIT
;
188 static inline void reset_retry_counters(struct rxe_qp
*qp
)
190 qp
->comp
.retry_cnt
= qp
->attr
.retry_cnt
;
191 qp
->comp
.rnr_retry
= qp
->attr
.rnr_retry
;
194 static inline enum comp_state
check_psn(struct rxe_qp
*qp
,
195 struct rxe_pkt_info
*pkt
,
196 struct rxe_send_wqe
*wqe
)
200 /* check to see if response is past the oldest WQE. if it is, complete
201 * send/write or error read/atomic
203 diff
= psn_compare(pkt
->psn
, wqe
->last_psn
);
205 if (wqe
->state
== wqe_state_pending
) {
206 if (wqe
->mask
& WR_ATOMIC_OR_READ_MASK
)
207 return COMPST_ERROR_RETRY
;
209 reset_retry_counters(qp
);
210 return COMPST_COMP_WQE
;
216 /* compare response packet to expected response */
217 diff
= psn_compare(pkt
->psn
, qp
->comp
.psn
);
219 /* response is most likely a retried packet if it matches an
220 * uncompleted WQE go complete it else ignore it
222 if (pkt
->psn
== wqe
->last_psn
)
223 return COMPST_COMP_ACK
;
226 } else if ((diff
> 0) && (wqe
->mask
& WR_ATOMIC_OR_READ_MASK
)) {
229 return COMPST_CHECK_ACK
;
233 static inline enum comp_state
check_ack(struct rxe_qp
*qp
,
234 struct rxe_pkt_info
*pkt
,
235 struct rxe_send_wqe
*wqe
)
237 unsigned int mask
= pkt
->mask
;
240 /* Check the sequence only */
241 switch (qp
->comp
.opcode
) {
243 /* Will catch all *_ONLY cases. */
244 if (!(mask
& RXE_START_MASK
))
249 case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST
:
250 case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
:
251 if (pkt
->opcode
!= IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
&&
252 pkt
->opcode
!= IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST
) {
260 /* Check operation validity. */
261 switch (pkt
->opcode
) {
262 case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST
:
263 case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST
:
264 case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY
:
267 if ((syn
& AETH_TYPE_MASK
) != AETH_ACK
)
270 /* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
271 * doesn't have an AETH)
273 case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
:
274 if (wqe
->wr
.opcode
!= IB_WR_RDMA_READ
&&
275 wqe
->wr
.opcode
!= IB_WR_RDMA_READ_WITH_INV
) {
278 reset_retry_counters(qp
);
281 case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE
:
284 if ((syn
& AETH_TYPE_MASK
) != AETH_ACK
)
287 if (wqe
->wr
.opcode
!= IB_WR_ATOMIC_CMP_AND_SWP
&&
288 wqe
->wr
.opcode
!= IB_WR_ATOMIC_FETCH_AND_ADD
)
290 reset_retry_counters(qp
);
291 return COMPST_ATOMIC
;
293 case IB_OPCODE_RC_ACKNOWLEDGE
:
295 switch (syn
& AETH_TYPE_MASK
) {
297 reset_retry_counters(qp
);
298 return COMPST_WRITE_SEND
;
301 return COMPST_RNR_RETRY
;
305 case AETH_NAK_PSN_SEQ_ERROR
:
306 /* a nak implicitly acks all packets with psns
309 if (psn_compare(pkt
->psn
, qp
->comp
.psn
) > 0) {
310 qp
->comp
.psn
= pkt
->psn
;
311 if (qp
->req
.wait_psn
) {
312 qp
->req
.wait_psn
= 0;
313 rxe_run_task(&qp
->req
.task
, 1);
316 return COMPST_ERROR_RETRY
;
318 case AETH_NAK_INVALID_REQ
:
319 wqe
->status
= IB_WC_REM_INV_REQ_ERR
;
322 case AETH_NAK_REM_ACC_ERR
:
323 wqe
->status
= IB_WC_REM_ACCESS_ERR
;
326 case AETH_NAK_REM_OP_ERR
:
327 wqe
->status
= IB_WC_REM_OP_ERR
;
331 pr_warn("unexpected nak %x\n", syn
);
332 wqe
->status
= IB_WC_REM_OP_ERR
;
342 pr_warn("unexpected opcode\n");
348 static inline enum comp_state
do_read(struct rxe_qp
*qp
,
349 struct rxe_pkt_info
*pkt
,
350 struct rxe_send_wqe
*wqe
)
352 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
355 ret
= copy_data(rxe
, qp
->pd
, IB_ACCESS_LOCAL_WRITE
,
356 &wqe
->dma
, payload_addr(pkt
),
357 payload_size(pkt
), to_mem_obj
, NULL
);
361 if (wqe
->dma
.resid
== 0 && (pkt
->mask
& RXE_END_MASK
))
362 return COMPST_COMP_ACK
;
364 return COMPST_UPDATE_COMP
;
367 static inline enum comp_state
do_atomic(struct rxe_qp
*qp
,
368 struct rxe_pkt_info
*pkt
,
369 struct rxe_send_wqe
*wqe
)
371 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
374 u64 atomic_orig
= atmack_orig(pkt
);
376 ret
= copy_data(rxe
, qp
->pd
, IB_ACCESS_LOCAL_WRITE
,
377 &wqe
->dma
, &atomic_orig
,
378 sizeof(u64
), to_mem_obj
, NULL
);
382 return COMPST_COMP_ACK
;
385 static void make_send_cqe(struct rxe_qp
*qp
, struct rxe_send_wqe
*wqe
,
388 memset(cqe
, 0, sizeof(*cqe
));
391 struct ib_wc
*wc
= &cqe
->ibwc
;
393 wc
->wr_id
= wqe
->wr
.wr_id
;
394 wc
->status
= wqe
->status
;
395 wc
->opcode
= wr_to_wc_opcode(wqe
->wr
.opcode
);
396 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE_WITH_IMM
||
397 wqe
->wr
.opcode
== IB_WR_SEND_WITH_IMM
)
398 wc
->wc_flags
= IB_WC_WITH_IMM
;
399 wc
->byte_len
= wqe
->dma
.length
;
402 struct ib_uverbs_wc
*uwc
= &cqe
->uibwc
;
404 uwc
->wr_id
= wqe
->wr
.wr_id
;
405 uwc
->status
= wqe
->status
;
406 uwc
->opcode
= wr_to_wc_opcode(wqe
->wr
.opcode
);
407 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE_WITH_IMM
||
408 wqe
->wr
.opcode
== IB_WR_SEND_WITH_IMM
)
409 uwc
->wc_flags
= IB_WC_WITH_IMM
;
410 uwc
->byte_len
= wqe
->dma
.length
;
411 uwc
->qp_num
= qp
->ibqp
.qp_num
;
416 * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS
417 * ---------8<---------8<-------------
418 * ...Note that if a completion error occurs, a Work Completion
419 * will always be generated, even if the signaling
420 * indicator requests an Unsignaled Completion.
421 * ---------8<---------8<-------------
423 static void do_complete(struct rxe_qp
*qp
, struct rxe_send_wqe
*wqe
)
427 if ((qp
->sq_sig_type
== IB_SIGNAL_ALL_WR
) ||
428 (wqe
->wr
.send_flags
& IB_SEND_SIGNALED
) ||
429 wqe
->status
!= IB_WC_SUCCESS
) {
430 make_send_cqe(qp
, wqe
, &cqe
);
431 advance_consumer(qp
->sq
.queue
);
432 rxe_cq_post(qp
->scq
, &cqe
, 0);
434 advance_consumer(qp
->sq
.queue
);
438 * we completed something so let req run again
439 * if it is trying to fence
441 if (qp
->req
.wait_fence
) {
442 qp
->req
.wait_fence
= 0;
443 rxe_run_task(&qp
->req
.task
, 1);
447 static inline enum comp_state
complete_ack(struct rxe_qp
*qp
,
448 struct rxe_pkt_info
*pkt
,
449 struct rxe_send_wqe
*wqe
)
453 if (wqe
->has_rd_atomic
) {
454 wqe
->has_rd_atomic
= 0;
455 atomic_inc(&qp
->req
.rd_atomic
);
456 if (qp
->req
.need_rd_atomic
) {
457 qp
->comp
.timeout_retry
= 0;
458 qp
->req
.need_rd_atomic
= 0;
459 rxe_run_task(&qp
->req
.task
, 1);
463 if (unlikely(qp
->req
.state
== QP_STATE_DRAIN
)) {
464 /* state_lock used by requester & completer */
465 spin_lock_irqsave(&qp
->state_lock
, flags
);
466 if ((qp
->req
.state
== QP_STATE_DRAIN
) &&
467 (qp
->comp
.psn
== qp
->req
.psn
)) {
468 qp
->req
.state
= QP_STATE_DRAINED
;
469 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
471 if (qp
->ibqp
.event_handler
) {
474 ev
.device
= qp
->ibqp
.device
;
475 ev
.element
.qp
= &qp
->ibqp
;
476 ev
.event
= IB_EVENT_SQ_DRAINED
;
477 qp
->ibqp
.event_handler(&ev
,
478 qp
->ibqp
.qp_context
);
481 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
485 do_complete(qp
, wqe
);
487 if (psn_compare(pkt
->psn
, qp
->comp
.psn
) >= 0)
488 return COMPST_UPDATE_COMP
;
493 static inline enum comp_state
complete_wqe(struct rxe_qp
*qp
,
494 struct rxe_pkt_info
*pkt
,
495 struct rxe_send_wqe
*wqe
)
497 qp
->comp
.opcode
= -1;
500 if (psn_compare(pkt
->psn
, qp
->comp
.psn
) >= 0)
501 qp
->comp
.psn
= (pkt
->psn
+ 1) & BTH_PSN_MASK
;
503 if (qp
->req
.wait_psn
) {
504 qp
->req
.wait_psn
= 0;
505 rxe_run_task(&qp
->req
.task
, 1);
509 do_complete(qp
, wqe
);
511 return COMPST_GET_WQE
;
514 static void rxe_drain_resp_pkts(struct rxe_qp
*qp
, bool notify
)
517 struct rxe_send_wqe
*wqe
;
519 while ((skb
= skb_dequeue(&qp
->resp_pkts
))) {
524 while ((wqe
= queue_head(qp
->sq
.queue
))) {
526 wqe
->status
= IB_WC_WR_FLUSH_ERR
;
527 do_complete(qp
, wqe
);
529 advance_consumer(qp
->sq
.queue
);
534 int rxe_completer(void *arg
)
536 struct rxe_qp
*qp
= (struct rxe_qp
*)arg
;
537 struct rxe_send_wqe
*wqe
= wqe
;
538 struct sk_buff
*skb
= NULL
;
539 struct rxe_pkt_info
*pkt
= NULL
;
540 enum comp_state state
;
544 if (!qp
->valid
|| qp
->req
.state
== QP_STATE_ERROR
||
545 qp
->req
.state
== QP_STATE_RESET
) {
546 rxe_drain_resp_pkts(qp
, qp
->valid
&&
547 qp
->req
.state
== QP_STATE_ERROR
);
551 if (qp
->comp
.timeout
) {
552 qp
->comp
.timeout_retry
= 1;
553 qp
->comp
.timeout
= 0;
555 qp
->comp
.timeout_retry
= 0;
558 if (qp
->req
.need_retry
)
561 state
= COMPST_GET_ACK
;
564 pr_debug("qp#%d state = %s\n", qp_num(qp
),
565 comp_state_name
[state
]);
568 skb
= skb_dequeue(&qp
->resp_pkts
);
570 pkt
= SKB_TO_PKT(skb
);
571 qp
->comp
.timeout_retry
= 0;
573 state
= COMPST_GET_WQE
;
577 state
= get_wqe(qp
, pkt
, &wqe
);
580 case COMPST_CHECK_PSN
:
581 state
= check_psn(qp
, pkt
, wqe
);
584 case COMPST_CHECK_ACK
:
585 state
= check_ack(qp
, pkt
, wqe
);
589 state
= do_read(qp
, pkt
, wqe
);
593 state
= do_atomic(qp
, pkt
, wqe
);
596 case COMPST_WRITE_SEND
:
597 if (wqe
->state
== wqe_state_pending
&&
598 wqe
->last_psn
== pkt
->psn
)
599 state
= COMPST_COMP_ACK
;
601 state
= COMPST_UPDATE_COMP
;
604 case COMPST_COMP_ACK
:
605 state
= complete_ack(qp
, pkt
, wqe
);
608 case COMPST_COMP_WQE
:
609 state
= complete_wqe(qp
, pkt
, wqe
);
612 case COMPST_UPDATE_COMP
:
613 if (pkt
->mask
& RXE_END_MASK
)
614 qp
->comp
.opcode
= -1;
616 qp
->comp
.opcode
= pkt
->opcode
;
618 if (psn_compare(pkt
->psn
, qp
->comp
.psn
) >= 0)
619 qp
->comp
.psn
= (pkt
->psn
+ 1) & BTH_PSN_MASK
;
621 if (qp
->req
.wait_psn
) {
622 qp
->req
.wait_psn
= 0;
623 rxe_run_task(&qp
->req
.task
, 1);
631 rxe_drop_ref(pkt
->qp
);
638 if (qp
->comp
.timeout_retry
&& wqe
) {
639 state
= COMPST_ERROR_RETRY
;
643 /* re reset the timeout counter if
645 * (2) the QP is alive
646 * (3) there is a packet sent by the requester that
647 * might be acked (we still might get spurious
648 * timeouts but try to keep them as few as possible)
649 * (4) the timeout parameter is set
651 if ((qp_type(qp
) == IB_QPT_RC
) &&
652 (qp
->req
.state
== QP_STATE_READY
) &&
653 (psn_compare(qp
->req
.psn
, qp
->comp
.psn
) > 0) &&
654 qp
->qp_timeout_jiffies
)
655 mod_timer(&qp
->retrans_timer
,
656 jiffies
+ qp
->qp_timeout_jiffies
);
660 case COMPST_ERROR_RETRY
:
661 /* we come here if the retry timer fired and we did
662 * not receive a response packet. try to retry the send
663 * queue if that makes sense and the limits have not
664 * been exceeded. remember that some timeouts are
665 * spurious since we do not reset the timer but kick
666 * it down the road or let it expire
669 /* there is nothing to retry in this case */
670 if (!wqe
|| (wqe
->state
== wqe_state_posted
)) {
675 if (qp
->comp
.retry_cnt
> 0) {
676 if (qp
->comp
.retry_cnt
!= 7)
677 qp
->comp
.retry_cnt
--;
679 /* no point in retrying if we have already
680 * seen the last ack that the requester could
683 if (psn_compare(qp
->req
.psn
,
685 /* tell the requester to retry the
686 * send send queue next time around
688 qp
->req
.need_retry
= 1;
689 rxe_run_task(&qp
->req
.task
, 1);
693 rxe_drop_ref(pkt
->qp
);
702 wqe
->status
= IB_WC_RETRY_EXC_ERR
;
703 state
= COMPST_ERROR
;
707 case COMPST_RNR_RETRY
:
708 if (qp
->comp
.rnr_retry
> 0) {
709 if (qp
->comp
.rnr_retry
!= 7)
710 qp
->comp
.rnr_retry
--;
712 qp
->req
.need_retry
= 1;
713 pr_debug("qp#%d set rnr nak timer\n",
715 mod_timer(&qp
->rnr_nak_timer
,
716 jiffies
+ rnrnak_jiffies(aeth_syn(pkt
)
718 rxe_drop_ref(pkt
->qp
);
723 wqe
->status
= IB_WC_RNR_RETRY_EXC_ERR
;
724 state
= COMPST_ERROR
;
729 WARN_ON_ONCE(wqe
->status
== IB_WC_SUCCESS
);
730 do_complete(qp
, wqe
);
734 rxe_drop_ref(pkt
->qp
);
745 /* we come here if we are done with processing and want the task to
746 * exit from the loop calling us
753 /* we come here if we have processed a packet we want the task to call
754 * us again to see if there is anything else to do