2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/skbuff.h>
38 #include "rxe_queue.h"
55 COMPST_EXIT
, /* We have an issue, and we want to rerun the completer */
56 COMPST_DONE
, /* The completer finished successflly */
59 static char *comp_state_name
[] = {
60 [COMPST_GET_ACK
] = "GET ACK",
61 [COMPST_GET_WQE
] = "GET WQE",
62 [COMPST_COMP_WQE
] = "COMP WQE",
63 [COMPST_COMP_ACK
] = "COMP ACK",
64 [COMPST_CHECK_PSN
] = "CHECK PSN",
65 [COMPST_CHECK_ACK
] = "CHECK ACK",
66 [COMPST_READ
] = "READ",
67 [COMPST_ATOMIC
] = "ATOMIC",
68 [COMPST_WRITE_SEND
] = "WRITE/SEND",
69 [COMPST_UPDATE_COMP
] = "UPDATE COMP",
70 [COMPST_ERROR_RETRY
] = "ERROR RETRY",
71 [COMPST_RNR_RETRY
] = "RNR RETRY",
72 [COMPST_ERROR
] = "ERROR",
73 [COMPST_EXIT
] = "EXIT",
74 [COMPST_DONE
] = "DONE",
77 static unsigned long rnrnak_usec
[32] = {
78 [IB_RNR_TIMER_655_36
] = 655360,
79 [IB_RNR_TIMER_000_01
] = 10,
80 [IB_RNR_TIMER_000_02
] = 20,
81 [IB_RNR_TIMER_000_03
] = 30,
82 [IB_RNR_TIMER_000_04
] = 40,
83 [IB_RNR_TIMER_000_06
] = 60,
84 [IB_RNR_TIMER_000_08
] = 80,
85 [IB_RNR_TIMER_000_12
] = 120,
86 [IB_RNR_TIMER_000_16
] = 160,
87 [IB_RNR_TIMER_000_24
] = 240,
88 [IB_RNR_TIMER_000_32
] = 320,
89 [IB_RNR_TIMER_000_48
] = 480,
90 [IB_RNR_TIMER_000_64
] = 640,
91 [IB_RNR_TIMER_000_96
] = 960,
92 [IB_RNR_TIMER_001_28
] = 1280,
93 [IB_RNR_TIMER_001_92
] = 1920,
94 [IB_RNR_TIMER_002_56
] = 2560,
95 [IB_RNR_TIMER_003_84
] = 3840,
96 [IB_RNR_TIMER_005_12
] = 5120,
97 [IB_RNR_TIMER_007_68
] = 7680,
98 [IB_RNR_TIMER_010_24
] = 10240,
99 [IB_RNR_TIMER_015_36
] = 15360,
100 [IB_RNR_TIMER_020_48
] = 20480,
101 [IB_RNR_TIMER_030_72
] = 30720,
102 [IB_RNR_TIMER_040_96
] = 40960,
103 [IB_RNR_TIMER_061_44
] = 61410,
104 [IB_RNR_TIMER_081_92
] = 81920,
105 [IB_RNR_TIMER_122_88
] = 122880,
106 [IB_RNR_TIMER_163_84
] = 163840,
107 [IB_RNR_TIMER_245_76
] = 245760,
108 [IB_RNR_TIMER_327_68
] = 327680,
109 [IB_RNR_TIMER_491_52
] = 491520,
112 static inline unsigned long rnrnak_jiffies(u8 timeout
)
114 return max_t(unsigned long,
115 usecs_to_jiffies(rnrnak_usec
[timeout
]), 1);
118 static enum ib_wc_opcode
wr_to_wc_opcode(enum ib_wr_opcode opcode
)
121 case IB_WR_RDMA_WRITE
: return IB_WC_RDMA_WRITE
;
122 case IB_WR_RDMA_WRITE_WITH_IMM
: return IB_WC_RDMA_WRITE
;
123 case IB_WR_SEND
: return IB_WC_SEND
;
124 case IB_WR_SEND_WITH_IMM
: return IB_WC_SEND
;
125 case IB_WR_RDMA_READ
: return IB_WC_RDMA_READ
;
126 case IB_WR_ATOMIC_CMP_AND_SWP
: return IB_WC_COMP_SWAP
;
127 case IB_WR_ATOMIC_FETCH_AND_ADD
: return IB_WC_FETCH_ADD
;
128 case IB_WR_LSO
: return IB_WC_LSO
;
129 case IB_WR_SEND_WITH_INV
: return IB_WC_SEND
;
130 case IB_WR_RDMA_READ_WITH_INV
: return IB_WC_RDMA_READ
;
131 case IB_WR_LOCAL_INV
: return IB_WC_LOCAL_INV
;
132 case IB_WR_REG_MR
: return IB_WC_REG_MR
;
139 void retransmit_timer(unsigned long data
)
141 struct rxe_qp
*qp
= (struct rxe_qp
*)data
;
144 qp
->comp
.timeout
= 1;
145 rxe_run_task(&qp
->comp
.task
, 1);
149 void rxe_comp_queue_pkt(struct rxe_dev
*rxe
, struct rxe_qp
*qp
,
154 skb_queue_tail(&qp
->resp_pkts
, skb
);
156 must_sched
= skb_queue_len(&qp
->resp_pkts
) > 1;
157 rxe_run_task(&qp
->comp
.task
, must_sched
);
160 static inline enum comp_state
get_wqe(struct rxe_qp
*qp
,
161 struct rxe_pkt_info
*pkt
,
162 struct rxe_send_wqe
**wqe_p
)
164 struct rxe_send_wqe
*wqe
;
166 /* we come here whether or not we found a response packet to see if
167 * there are any posted WQEs
169 wqe
= queue_head(qp
->sq
.queue
);
172 /* no WQE or requester has not started it yet */
173 if (!wqe
|| wqe
->state
== wqe_state_posted
)
174 return pkt
? COMPST_DONE
: COMPST_EXIT
;
176 /* WQE does not require an ack */
177 if (wqe
->state
== wqe_state_done
)
178 return COMPST_COMP_WQE
;
180 /* WQE caused an error */
181 if (wqe
->state
== wqe_state_error
)
184 /* we have a WQE, if we also have an ack check its PSN */
185 return pkt
? COMPST_CHECK_PSN
: COMPST_EXIT
;
188 static inline void reset_retry_counters(struct rxe_qp
*qp
)
190 qp
->comp
.retry_cnt
= qp
->attr
.retry_cnt
;
191 qp
->comp
.rnr_retry
= qp
->attr
.rnr_retry
;
194 static inline enum comp_state
check_psn(struct rxe_qp
*qp
,
195 struct rxe_pkt_info
*pkt
,
196 struct rxe_send_wqe
*wqe
)
200 /* check to see if response is past the oldest WQE. if it is, complete
201 * send/write or error read/atomic
203 diff
= psn_compare(pkt
->psn
, wqe
->last_psn
);
205 if (wqe
->state
== wqe_state_pending
) {
206 if (wqe
->mask
& WR_ATOMIC_OR_READ_MASK
)
207 return COMPST_ERROR_RETRY
;
209 reset_retry_counters(qp
);
210 return COMPST_COMP_WQE
;
216 /* compare response packet to expected response */
217 diff
= psn_compare(pkt
->psn
, qp
->comp
.psn
);
219 /* response is most likely a retried packet if it matches an
220 * uncompleted WQE go complete it else ignore it
222 if (pkt
->psn
== wqe
->last_psn
)
223 return COMPST_COMP_ACK
;
226 } else if ((diff
> 0) && (wqe
->mask
& WR_ATOMIC_OR_READ_MASK
)) {
227 return COMPST_ERROR_RETRY
;
229 return COMPST_CHECK_ACK
;
233 static inline enum comp_state
check_ack(struct rxe_qp
*qp
,
234 struct rxe_pkt_info
*pkt
,
235 struct rxe_send_wqe
*wqe
)
237 unsigned int mask
= pkt
->mask
;
240 /* Check the sequence only */
241 switch (qp
->comp
.opcode
) {
243 /* Will catch all *_ONLY cases. */
244 if (!(mask
& RXE_START_MASK
))
249 case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST
:
250 case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
:
251 if (pkt
->opcode
!= IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
&&
252 pkt
->opcode
!= IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST
) {
260 /* Check operation validity. */
261 switch (pkt
->opcode
) {
262 case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST
:
263 case IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST
:
264 case IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY
:
267 if ((syn
& AETH_TYPE_MASK
) != AETH_ACK
)
270 /* Fall through (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
271 * doesn't have an AETH)
273 case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
:
274 if (wqe
->wr
.opcode
!= IB_WR_RDMA_READ
&&
275 wqe
->wr
.opcode
!= IB_WR_RDMA_READ_WITH_INV
) {
278 reset_retry_counters(qp
);
281 case IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE
:
284 if ((syn
& AETH_TYPE_MASK
) != AETH_ACK
)
287 if (wqe
->wr
.opcode
!= IB_WR_ATOMIC_CMP_AND_SWP
&&
288 wqe
->wr
.opcode
!= IB_WR_ATOMIC_FETCH_AND_ADD
)
290 reset_retry_counters(qp
);
291 return COMPST_ATOMIC
;
293 case IB_OPCODE_RC_ACKNOWLEDGE
:
295 switch (syn
& AETH_TYPE_MASK
) {
297 reset_retry_counters(qp
);
298 return COMPST_WRITE_SEND
;
301 return COMPST_RNR_RETRY
;
305 case AETH_NAK_PSN_SEQ_ERROR
:
306 /* a nak implicitly acks all packets with psns
309 if (psn_compare(pkt
->psn
, qp
->comp
.psn
) > 0) {
310 qp
->comp
.psn
= pkt
->psn
;
311 if (qp
->req
.wait_psn
) {
312 qp
->req
.wait_psn
= 0;
313 rxe_run_task(&qp
->req
.task
, 1);
316 return COMPST_ERROR_RETRY
;
318 case AETH_NAK_INVALID_REQ
:
319 wqe
->status
= IB_WC_REM_INV_REQ_ERR
;
322 case AETH_NAK_REM_ACC_ERR
:
323 wqe
->status
= IB_WC_REM_ACCESS_ERR
;
326 case AETH_NAK_REM_OP_ERR
:
327 wqe
->status
= IB_WC_REM_OP_ERR
;
331 pr_warn("unexpected nak %x\n", syn
);
332 wqe
->status
= IB_WC_REM_OP_ERR
;
342 pr_warn("unexpected opcode\n");
348 static inline enum comp_state
do_read(struct rxe_qp
*qp
,
349 struct rxe_pkt_info
*pkt
,
350 struct rxe_send_wqe
*wqe
)
352 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
355 ret
= copy_data(rxe
, qp
->pd
, IB_ACCESS_LOCAL_WRITE
,
356 &wqe
->dma
, payload_addr(pkt
),
357 payload_size(pkt
), to_mem_obj
, NULL
);
361 if (wqe
->dma
.resid
== 0 && (pkt
->mask
& RXE_END_MASK
))
362 return COMPST_COMP_ACK
;
364 return COMPST_UPDATE_COMP
;
367 static inline enum comp_state
do_atomic(struct rxe_qp
*qp
,
368 struct rxe_pkt_info
*pkt
,
369 struct rxe_send_wqe
*wqe
)
371 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
374 u64 atomic_orig
= atmack_orig(pkt
);
376 ret
= copy_data(rxe
, qp
->pd
, IB_ACCESS_LOCAL_WRITE
,
377 &wqe
->dma
, &atomic_orig
,
378 sizeof(u64
), to_mem_obj
, NULL
);
382 return COMPST_COMP_ACK
;
385 static void make_send_cqe(struct rxe_qp
*qp
, struct rxe_send_wqe
*wqe
,
388 memset(cqe
, 0, sizeof(*cqe
));
391 struct ib_wc
*wc
= &cqe
->ibwc
;
393 wc
->wr_id
= wqe
->wr
.wr_id
;
394 wc
->status
= wqe
->status
;
395 wc
->opcode
= wr_to_wc_opcode(wqe
->wr
.opcode
);
396 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE_WITH_IMM
||
397 wqe
->wr
.opcode
== IB_WR_SEND_WITH_IMM
)
398 wc
->wc_flags
= IB_WC_WITH_IMM
;
399 wc
->byte_len
= wqe
->dma
.length
;
402 struct ib_uverbs_wc
*uwc
= &cqe
->uibwc
;
404 uwc
->wr_id
= wqe
->wr
.wr_id
;
405 uwc
->status
= wqe
->status
;
406 uwc
->opcode
= wr_to_wc_opcode(wqe
->wr
.opcode
);
407 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE_WITH_IMM
||
408 wqe
->wr
.opcode
== IB_WR_SEND_WITH_IMM
)
409 uwc
->wc_flags
= IB_WC_WITH_IMM
;
410 uwc
->byte_len
= wqe
->dma
.length
;
411 uwc
->qp_num
= qp
->ibqp
.qp_num
;
415 static void do_complete(struct rxe_qp
*qp
, struct rxe_send_wqe
*wqe
)
419 if ((qp
->sq_sig_type
== IB_SIGNAL_ALL_WR
) ||
420 (wqe
->wr
.send_flags
& IB_SEND_SIGNALED
) ||
421 (qp
->req
.state
== QP_STATE_ERROR
)) {
422 make_send_cqe(qp
, wqe
, &cqe
);
423 rxe_cq_post(qp
->scq
, &cqe
, 0);
426 advance_consumer(qp
->sq
.queue
);
429 * we completed something so let req run again
430 * if it is trying to fence
432 if (qp
->req
.wait_fence
) {
433 qp
->req
.wait_fence
= 0;
434 rxe_run_task(&qp
->req
.task
, 1);
438 static inline enum comp_state
complete_ack(struct rxe_qp
*qp
,
439 struct rxe_pkt_info
*pkt
,
440 struct rxe_send_wqe
*wqe
)
444 if (wqe
->has_rd_atomic
) {
445 wqe
->has_rd_atomic
= 0;
446 atomic_inc(&qp
->req
.rd_atomic
);
447 if (qp
->req
.need_rd_atomic
) {
448 qp
->comp
.timeout_retry
= 0;
449 qp
->req
.need_rd_atomic
= 0;
450 rxe_run_task(&qp
->req
.task
, 1);
454 if (unlikely(qp
->req
.state
== QP_STATE_DRAIN
)) {
455 /* state_lock used by requester & completer */
456 spin_lock_irqsave(&qp
->state_lock
, flags
);
457 if ((qp
->req
.state
== QP_STATE_DRAIN
) &&
458 (qp
->comp
.psn
== qp
->req
.psn
)) {
459 qp
->req
.state
= QP_STATE_DRAINED
;
460 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
462 if (qp
->ibqp
.event_handler
) {
465 ev
.device
= qp
->ibqp
.device
;
466 ev
.element
.qp
= &qp
->ibqp
;
467 ev
.event
= IB_EVENT_SQ_DRAINED
;
468 qp
->ibqp
.event_handler(&ev
,
469 qp
->ibqp
.qp_context
);
472 spin_unlock_irqrestore(&qp
->state_lock
, flags
);
476 do_complete(qp
, wqe
);
478 if (psn_compare(pkt
->psn
, qp
->comp
.psn
) >= 0)
479 return COMPST_UPDATE_COMP
;
484 static inline enum comp_state
complete_wqe(struct rxe_qp
*qp
,
485 struct rxe_pkt_info
*pkt
,
486 struct rxe_send_wqe
*wqe
)
488 qp
->comp
.opcode
= -1;
491 if (psn_compare(pkt
->psn
, qp
->comp
.psn
) >= 0)
492 qp
->comp
.psn
= (pkt
->psn
+ 1) & BTH_PSN_MASK
;
494 if (qp
->req
.wait_psn
) {
495 qp
->req
.wait_psn
= 0;
496 rxe_run_task(&qp
->req
.task
, 1);
500 do_complete(qp
, wqe
);
502 return COMPST_GET_WQE
;
505 int rxe_completer(void *arg
)
507 struct rxe_qp
*qp
= (struct rxe_qp
*)arg
;
508 struct rxe_send_wqe
*wqe
= wqe
;
509 struct sk_buff
*skb
= NULL
;
510 struct rxe_pkt_info
*pkt
= NULL
;
511 enum comp_state state
;
514 while ((skb
= skb_dequeue(&qp
->resp_pkts
))) {
521 while (queue_head(qp
->sq
.queue
))
522 advance_consumer(qp
->sq
.queue
);
527 if (qp
->req
.state
== QP_STATE_ERROR
) {
528 while ((skb
= skb_dequeue(&qp
->resp_pkts
))) {
535 while ((wqe
= queue_head(qp
->sq
.queue
))) {
536 wqe
->status
= IB_WC_WR_FLUSH_ERR
;
537 do_complete(qp
, wqe
);
543 if (qp
->req
.state
== QP_STATE_RESET
) {
544 while ((skb
= skb_dequeue(&qp
->resp_pkts
))) {
551 while (queue_head(qp
->sq
.queue
))
552 advance_consumer(qp
->sq
.queue
);
557 if (qp
->comp
.timeout
) {
558 qp
->comp
.timeout_retry
= 1;
559 qp
->comp
.timeout
= 0;
561 qp
->comp
.timeout_retry
= 0;
564 if (qp
->req
.need_retry
)
567 state
= COMPST_GET_ACK
;
570 pr_debug("state = %s\n", comp_state_name
[state
]);
573 skb
= skb_dequeue(&qp
->resp_pkts
);
575 pkt
= SKB_TO_PKT(skb
);
576 qp
->comp
.timeout_retry
= 0;
578 state
= COMPST_GET_WQE
;
582 state
= get_wqe(qp
, pkt
, &wqe
);
585 case COMPST_CHECK_PSN
:
586 state
= check_psn(qp
, pkt
, wqe
);
589 case COMPST_CHECK_ACK
:
590 state
= check_ack(qp
, pkt
, wqe
);
594 state
= do_read(qp
, pkt
, wqe
);
598 state
= do_atomic(qp
, pkt
, wqe
);
601 case COMPST_WRITE_SEND
:
602 if (wqe
->state
== wqe_state_pending
&&
603 wqe
->last_psn
== pkt
->psn
)
604 state
= COMPST_COMP_ACK
;
606 state
= COMPST_UPDATE_COMP
;
609 case COMPST_COMP_ACK
:
610 state
= complete_ack(qp
, pkt
, wqe
);
613 case COMPST_COMP_WQE
:
614 state
= complete_wqe(qp
, pkt
, wqe
);
617 case COMPST_UPDATE_COMP
:
618 if (pkt
->mask
& RXE_END_MASK
)
619 qp
->comp
.opcode
= -1;
621 qp
->comp
.opcode
= pkt
->opcode
;
623 if (psn_compare(pkt
->psn
, qp
->comp
.psn
) >= 0)
624 qp
->comp
.psn
= (pkt
->psn
+ 1) & BTH_PSN_MASK
;
626 if (qp
->req
.wait_psn
) {
627 qp
->req
.wait_psn
= 0;
628 rxe_run_task(&qp
->req
.task
, 1);
636 rxe_drop_ref(pkt
->qp
);
642 if (qp
->comp
.timeout_retry
&& wqe
) {
643 state
= COMPST_ERROR_RETRY
;
647 /* re reset the timeout counter if
649 * (2) the QP is alive
650 * (3) there is a packet sent by the requester that
651 * might be acked (we still might get spurious
652 * timeouts but try to keep them as few as possible)
653 * (4) the timeout parameter is set
655 if ((qp_type(qp
) == IB_QPT_RC
) &&
656 (qp
->req
.state
== QP_STATE_READY
) &&
657 (psn_compare(qp
->req
.psn
, qp
->comp
.psn
) > 0) &&
658 qp
->qp_timeout_jiffies
)
659 mod_timer(&qp
->retrans_timer
,
660 jiffies
+ qp
->qp_timeout_jiffies
);
663 case COMPST_ERROR_RETRY
:
664 /* we come here if the retry timer fired and we did
665 * not receive a response packet. try to retry the send
666 * queue if that makes sense and the limits have not
667 * been exceeded. remember that some timeouts are
668 * spurious since we do not reset the timer but kick
669 * it down the road or let it expire
672 /* there is nothing to retry in this case */
673 if (!wqe
|| (wqe
->state
== wqe_state_posted
))
676 if (qp
->comp
.retry_cnt
> 0) {
677 if (qp
->comp
.retry_cnt
!= 7)
678 qp
->comp
.retry_cnt
--;
680 /* no point in retrying if we have already
681 * seen the last ack that the requester could
684 if (psn_compare(qp
->req
.psn
,
686 /* tell the requester to retry the
687 * send send queue next time around
689 qp
->req
.need_retry
= 1;
690 rxe_run_task(&qp
->req
.task
, 1);
694 wqe
->status
= IB_WC_RETRY_EXC_ERR
;
695 state
= COMPST_ERROR
;
699 case COMPST_RNR_RETRY
:
700 if (qp
->comp
.rnr_retry
> 0) {
701 if (qp
->comp
.rnr_retry
!= 7)
702 qp
->comp
.rnr_retry
--;
704 qp
->req
.need_retry
= 1;
705 pr_debug("set rnr nak timer\n");
706 mod_timer(&qp
->rnr_nak_timer
,
707 jiffies
+ rnrnak_jiffies(aeth_syn(pkt
)
711 wqe
->status
= IB_WC_RNR_RETRY_EXC_ERR
;
712 state
= COMPST_ERROR
;
717 do_complete(qp
, wqe
);
724 /* we come here if we are done with processing and want the task to
725 * exit from the loop calling us
730 /* we come here if we have processed a packet we want the task to call
731 * us again to see if there is anything else to do