2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/skbuff.h>
38 #include "rxe_queue.h"
54 RESPST_DUPLICATE_REQUEST
,
55 RESPST_ERR_MALFORMED_WQE
,
56 RESPST_ERR_UNSUPPORTED_OPCODE
,
57 RESPST_ERR_MISALIGNED_ATOMIC
,
58 RESPST_ERR_PSN_OUT_OF_SEQ
,
59 RESPST_ERR_MISSING_OPCODE_FIRST
,
60 RESPST_ERR_MISSING_OPCODE_LAST_C
,
61 RESPST_ERR_MISSING_OPCODE_LAST_D1E
,
62 RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
,
64 RESPST_ERR_RKEY_VIOLATION
,
66 RESPST_ERR_CQ_OVERFLOW
,
73 static char *resp_state_name
[] = {
74 [RESPST_NONE
] = "NONE",
75 [RESPST_GET_REQ
] = "GET_REQ",
76 [RESPST_CHK_PSN
] = "CHK_PSN",
77 [RESPST_CHK_OP_SEQ
] = "CHK_OP_SEQ",
78 [RESPST_CHK_OP_VALID
] = "CHK_OP_VALID",
79 [RESPST_CHK_RESOURCE
] = "CHK_RESOURCE",
80 [RESPST_CHK_LENGTH
] = "CHK_LENGTH",
81 [RESPST_CHK_RKEY
] = "CHK_RKEY",
82 [RESPST_EXECUTE
] = "EXECUTE",
83 [RESPST_READ_REPLY
] = "READ_REPLY",
84 [RESPST_COMPLETE
] = "COMPLETE",
85 [RESPST_ACKNOWLEDGE
] = "ACKNOWLEDGE",
86 [RESPST_CLEANUP
] = "CLEANUP",
87 [RESPST_DUPLICATE_REQUEST
] = "DUPLICATE_REQUEST",
88 [RESPST_ERR_MALFORMED_WQE
] = "ERR_MALFORMED_WQE",
89 [RESPST_ERR_UNSUPPORTED_OPCODE
] = "ERR_UNSUPPORTED_OPCODE",
90 [RESPST_ERR_MISALIGNED_ATOMIC
] = "ERR_MISALIGNED_ATOMIC",
91 [RESPST_ERR_PSN_OUT_OF_SEQ
] = "ERR_PSN_OUT_OF_SEQ",
92 [RESPST_ERR_MISSING_OPCODE_FIRST
] = "ERR_MISSING_OPCODE_FIRST",
93 [RESPST_ERR_MISSING_OPCODE_LAST_C
] = "ERR_MISSING_OPCODE_LAST_C",
94 [RESPST_ERR_MISSING_OPCODE_LAST_D1E
] = "ERR_MISSING_OPCODE_LAST_D1E",
95 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
] = "ERR_TOO_MANY_RDMA_ATM_REQ",
96 [RESPST_ERR_RNR
] = "ERR_RNR",
97 [RESPST_ERR_RKEY_VIOLATION
] = "ERR_RKEY_VIOLATION",
98 [RESPST_ERR_LENGTH
] = "ERR_LENGTH",
99 [RESPST_ERR_CQ_OVERFLOW
] = "ERR_CQ_OVERFLOW",
100 [RESPST_ERROR
] = "ERROR",
101 [RESPST_RESET
] = "RESET",
102 [RESPST_DONE
] = "DONE",
103 [RESPST_EXIT
] = "EXIT",
106 /* rxe_recv calls here to add a request packet to the input queue */
107 void rxe_resp_queue_pkt(struct rxe_dev
*rxe
, struct rxe_qp
*qp
,
111 struct rxe_pkt_info
*pkt
= SKB_TO_PKT(skb
);
113 skb_queue_tail(&qp
->req_pkts
, skb
);
115 must_sched
= (pkt
->opcode
== IB_OPCODE_RC_RDMA_READ_REQUEST
) ||
116 (skb_queue_len(&qp
->req_pkts
) > 1);
118 rxe_run_task(&qp
->resp
.task
, must_sched
);
121 static inline enum resp_states
get_req(struct rxe_qp
*qp
,
122 struct rxe_pkt_info
**pkt_p
)
126 if (qp
->resp
.state
== QP_STATE_ERROR
) {
127 skb
= skb_dequeue(&qp
->req_pkts
);
129 /* drain request packet queue */
132 return RESPST_GET_REQ
;
135 /* go drain recv wr queue */
136 return RESPST_CHK_RESOURCE
;
139 skb
= skb_peek(&qp
->req_pkts
);
143 *pkt_p
= SKB_TO_PKT(skb
);
145 return (qp
->resp
.res
) ? RESPST_READ_REPLY
: RESPST_CHK_PSN
;
148 static enum resp_states
check_psn(struct rxe_qp
*qp
,
149 struct rxe_pkt_info
*pkt
)
151 int diff
= psn_compare(pkt
->psn
, qp
->resp
.psn
);
152 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
154 switch (qp_type(qp
)) {
157 if (qp
->resp
.sent_psn_nak
)
158 return RESPST_CLEANUP
;
160 qp
->resp
.sent_psn_nak
= 1;
161 rxe_counter_inc(rxe
, RXE_CNT_OUT_OF_SEQ_REQ
);
162 return RESPST_ERR_PSN_OUT_OF_SEQ
;
164 } else if (diff
< 0) {
165 rxe_counter_inc(rxe
, RXE_CNT_DUP_REQ
);
166 return RESPST_DUPLICATE_REQUEST
;
169 if (qp
->resp
.sent_psn_nak
)
170 qp
->resp
.sent_psn_nak
= 0;
175 if (qp
->resp
.drop_msg
|| diff
!= 0) {
176 if (pkt
->mask
& RXE_START_MASK
) {
177 qp
->resp
.drop_msg
= 0;
178 return RESPST_CHK_OP_SEQ
;
181 qp
->resp
.drop_msg
= 1;
182 return RESPST_CLEANUP
;
189 return RESPST_CHK_OP_SEQ
;
192 static enum resp_states
check_op_seq(struct rxe_qp
*qp
,
193 struct rxe_pkt_info
*pkt
)
195 switch (qp_type(qp
)) {
197 switch (qp
->resp
.opcode
) {
198 case IB_OPCODE_RC_SEND_FIRST
:
199 case IB_OPCODE_RC_SEND_MIDDLE
:
200 switch (pkt
->opcode
) {
201 case IB_OPCODE_RC_SEND_MIDDLE
:
202 case IB_OPCODE_RC_SEND_LAST
:
203 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
204 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
205 return RESPST_CHK_OP_VALID
;
207 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
210 case IB_OPCODE_RC_RDMA_WRITE_FIRST
:
211 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
212 switch (pkt
->opcode
) {
213 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
214 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
215 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
216 return RESPST_CHK_OP_VALID
;
218 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
222 switch (pkt
->opcode
) {
223 case IB_OPCODE_RC_SEND_MIDDLE
:
224 case IB_OPCODE_RC_SEND_LAST
:
225 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
226 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
227 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
228 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
229 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
230 return RESPST_ERR_MISSING_OPCODE_FIRST
;
232 return RESPST_CHK_OP_VALID
;
238 switch (qp
->resp
.opcode
) {
239 case IB_OPCODE_UC_SEND_FIRST
:
240 case IB_OPCODE_UC_SEND_MIDDLE
:
241 switch (pkt
->opcode
) {
242 case IB_OPCODE_UC_SEND_MIDDLE
:
243 case IB_OPCODE_UC_SEND_LAST
:
244 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
245 return RESPST_CHK_OP_VALID
;
247 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
250 case IB_OPCODE_UC_RDMA_WRITE_FIRST
:
251 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
252 switch (pkt
->opcode
) {
253 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
254 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
255 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
256 return RESPST_CHK_OP_VALID
;
258 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
262 switch (pkt
->opcode
) {
263 case IB_OPCODE_UC_SEND_MIDDLE
:
264 case IB_OPCODE_UC_SEND_LAST
:
265 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
266 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
267 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
268 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
269 qp
->resp
.drop_msg
= 1;
270 return RESPST_CLEANUP
;
272 return RESPST_CHK_OP_VALID
;
278 return RESPST_CHK_OP_VALID
;
282 static enum resp_states
check_op_valid(struct rxe_qp
*qp
,
283 struct rxe_pkt_info
*pkt
)
285 switch (qp_type(qp
)) {
287 if (((pkt
->mask
& RXE_READ_MASK
) &&
288 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_READ
)) ||
289 ((pkt
->mask
& RXE_WRITE_MASK
) &&
290 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) ||
291 ((pkt
->mask
& RXE_ATOMIC_MASK
) &&
292 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_ATOMIC
))) {
293 return RESPST_ERR_UNSUPPORTED_OPCODE
;
299 if ((pkt
->mask
& RXE_WRITE_MASK
) &&
300 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) {
301 qp
->resp
.drop_msg
= 1;
302 return RESPST_CLEANUP
;
317 return RESPST_CHK_RESOURCE
;
320 static enum resp_states
get_srq_wqe(struct rxe_qp
*qp
)
322 struct rxe_srq
*srq
= qp
->srq
;
323 struct rxe_queue
*q
= srq
->rq
.queue
;
324 struct rxe_recv_wqe
*wqe
;
328 return RESPST_ERR_RNR
;
330 spin_lock_bh(&srq
->rq
.consumer_lock
);
334 spin_unlock_bh(&srq
->rq
.consumer_lock
);
335 return RESPST_ERR_RNR
;
338 /* note kernel and user space recv wqes have same size */
339 memcpy(&qp
->resp
.srq_wqe
, wqe
, sizeof(qp
->resp
.srq_wqe
));
341 qp
->resp
.wqe
= &qp
->resp
.srq_wqe
.wqe
;
344 if (srq
->limit
&& srq
->ibsrq
.event_handler
&&
345 (queue_count(q
) < srq
->limit
)) {
350 spin_unlock_bh(&srq
->rq
.consumer_lock
);
351 return RESPST_CHK_LENGTH
;
354 spin_unlock_bh(&srq
->rq
.consumer_lock
);
355 ev
.device
= qp
->ibqp
.device
;
356 ev
.element
.srq
= qp
->ibqp
.srq
;
357 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
358 srq
->ibsrq
.event_handler(&ev
, srq
->ibsrq
.srq_context
);
359 return RESPST_CHK_LENGTH
;
362 static enum resp_states
check_resource(struct rxe_qp
*qp
,
363 struct rxe_pkt_info
*pkt
)
365 struct rxe_srq
*srq
= qp
->srq
;
367 if (qp
->resp
.state
== QP_STATE_ERROR
) {
369 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
370 return RESPST_COMPLETE
;
372 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
374 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
375 return RESPST_COMPLETE
;
384 if (pkt
->mask
& RXE_READ_OR_ATOMIC
) {
385 /* it is the requesters job to not send
386 * too many read/atomic ops, we just
387 * recycle the responder resource queue
389 if (likely(qp
->attr
.max_dest_rd_atomic
> 0))
390 return RESPST_CHK_LENGTH
;
392 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
;
395 if (pkt
->mask
& RXE_RWR_MASK
) {
397 return get_srq_wqe(qp
);
399 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
400 return (qp
->resp
.wqe
) ? RESPST_CHK_LENGTH
: RESPST_ERR_RNR
;
403 return RESPST_CHK_LENGTH
;
406 static enum resp_states
check_length(struct rxe_qp
*qp
,
407 struct rxe_pkt_info
*pkt
)
409 switch (qp_type(qp
)) {
411 return RESPST_CHK_RKEY
;
414 return RESPST_CHK_RKEY
;
417 return RESPST_CHK_RKEY
;
421 static enum resp_states
check_rkey(struct rxe_qp
*qp
,
422 struct rxe_pkt_info
*pkt
)
424 struct rxe_mem
*mem
= NULL
;
430 enum resp_states state
;
433 if (pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_MASK
)) {
434 if (pkt
->mask
& RXE_RETH_MASK
) {
435 qp
->resp
.va
= reth_va(pkt
);
436 qp
->resp
.rkey
= reth_rkey(pkt
);
437 qp
->resp
.resid
= reth_len(pkt
);
438 qp
->resp
.length
= reth_len(pkt
);
440 access
= (pkt
->mask
& RXE_READ_MASK
) ? IB_ACCESS_REMOTE_READ
441 : IB_ACCESS_REMOTE_WRITE
;
442 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
443 qp
->resp
.va
= atmeth_va(pkt
);
444 qp
->resp
.rkey
= atmeth_rkey(pkt
);
445 qp
->resp
.resid
= sizeof(u64
);
446 access
= IB_ACCESS_REMOTE_ATOMIC
;
448 return RESPST_EXECUTE
;
451 /* A zero-byte op is not required to set an addr or rkey. */
452 if ((pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_OR_SEND
)) &&
453 (pkt
->mask
& RXE_RETH_MASK
) &&
454 reth_len(pkt
) == 0) {
455 return RESPST_EXECUTE
;
459 rkey
= qp
->resp
.rkey
;
460 resid
= qp
->resp
.resid
;
461 pktlen
= payload_size(pkt
);
463 mem
= lookup_mem(qp
->pd
, access
, rkey
, lookup_remote
);
465 state
= RESPST_ERR_RKEY_VIOLATION
;
469 if (unlikely(mem
->state
== RXE_MEM_STATE_FREE
)) {
470 state
= RESPST_ERR_RKEY_VIOLATION
;
474 if (mem_check_range(mem
, va
, resid
)) {
475 state
= RESPST_ERR_RKEY_VIOLATION
;
479 if (pkt
->mask
& RXE_WRITE_MASK
) {
481 if (pktlen
!= mtu
|| bth_pad(pkt
)) {
482 state
= RESPST_ERR_LENGTH
;
486 if (pktlen
!= resid
) {
487 state
= RESPST_ERR_LENGTH
;
490 if ((bth_pad(pkt
) != (0x3 & (-resid
)))) {
491 /* This case may not be exactly that
492 * but nothing else fits.
494 state
= RESPST_ERR_LENGTH
;
500 WARN_ON_ONCE(qp
->resp
.mr
);
503 return RESPST_EXECUTE
;
511 static enum resp_states
send_data_in(struct rxe_qp
*qp
, void *data_addr
,
516 err
= copy_data(qp
->pd
, IB_ACCESS_LOCAL_WRITE
, &qp
->resp
.wqe
->dma
,
517 data_addr
, data_len
, to_mem_obj
, NULL
);
519 return (err
== -ENOSPC
) ? RESPST_ERR_LENGTH
520 : RESPST_ERR_MALFORMED_WQE
;
525 static enum resp_states
write_data_in(struct rxe_qp
*qp
,
526 struct rxe_pkt_info
*pkt
)
528 enum resp_states rc
= RESPST_NONE
;
530 int data_len
= payload_size(pkt
);
532 err
= rxe_mem_copy(qp
->resp
.mr
, qp
->resp
.va
, payload_addr(pkt
),
533 data_len
, to_mem_obj
, NULL
);
535 rc
= RESPST_ERR_RKEY_VIOLATION
;
539 qp
->resp
.va
+= data_len
;
540 qp
->resp
.resid
-= data_len
;
546 /* Guarantee atomicity of atomic operations at the machine level. */
547 static DEFINE_SPINLOCK(atomic_ops_lock
);
549 static enum resp_states
process_atomic(struct rxe_qp
*qp
,
550 struct rxe_pkt_info
*pkt
)
552 u64 iova
= atmeth_va(pkt
);
554 enum resp_states ret
;
555 struct rxe_mem
*mr
= qp
->resp
.mr
;
557 if (mr
->state
!= RXE_MEM_STATE_VALID
) {
558 ret
= RESPST_ERR_RKEY_VIOLATION
;
562 vaddr
= iova_to_vaddr(mr
, iova
, sizeof(u64
));
564 /* check vaddr is 8 bytes aligned. */
565 if (!vaddr
|| (uintptr_t)vaddr
& 7) {
566 ret
= RESPST_ERR_MISALIGNED_ATOMIC
;
570 spin_lock_bh(&atomic_ops_lock
);
572 qp
->resp
.atomic_orig
= *vaddr
;
574 if (pkt
->opcode
== IB_OPCODE_RC_COMPARE_SWAP
||
575 pkt
->opcode
== IB_OPCODE_RD_COMPARE_SWAP
) {
576 if (*vaddr
== atmeth_comp(pkt
))
577 *vaddr
= atmeth_swap_add(pkt
);
579 *vaddr
+= atmeth_swap_add(pkt
);
582 spin_unlock_bh(&atomic_ops_lock
);
589 static struct sk_buff
*prepare_ack_packet(struct rxe_qp
*qp
,
590 struct rxe_pkt_info
*pkt
,
591 struct rxe_pkt_info
*ack
,
598 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
609 pad
= (-payload
) & 0x3;
610 paylen
= rxe_opcode
[opcode
].length
+ payload
+ pad
+ RXE_ICRC_SIZE
;
612 skb
= rxe_init_packet(rxe
, &qp
->pri_av
, paylen
, ack
);
617 ack
->opcode
= opcode
;
618 ack
->mask
= rxe_opcode
[opcode
].mask
;
619 ack
->offset
= pkt
->offset
;
620 ack
->paylen
= paylen
;
622 /* fill in bth using the request packet headers */
623 memcpy(ack
->hdr
, pkt
->hdr
, pkt
->offset
+ RXE_BTH_BYTES
);
625 bth_set_opcode(ack
, opcode
);
626 bth_set_qpn(ack
, qp
->attr
.dest_qp_num
);
627 bth_set_pad(ack
, pad
);
629 bth_set_psn(ack
, psn
);
633 if (ack
->mask
& RXE_AETH_MASK
) {
634 aeth_set_syn(ack
, syndrome
);
635 aeth_set_msn(ack
, qp
->resp
.msn
);
638 if (ack
->mask
& RXE_ATMACK_MASK
)
639 atmack_set_orig(ack
, qp
->resp
.atomic_orig
);
641 err
= rxe_prepare(rxe
, ack
, skb
, &crc
);
648 /* CRC computation will be continued by the caller */
651 p
= payload_addr(ack
) + payload
+ bth_pad(ack
);
658 /* RDMA read response. If res is not NULL, then we have a current RDMA request
659 * being processed or replayed.
661 static enum resp_states
read_reply(struct rxe_qp
*qp
,
662 struct rxe_pkt_info
*req_pkt
)
664 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
665 struct rxe_pkt_info ack_pkt
;
668 enum resp_states state
;
672 struct resp_res
*res
= qp
->resp
.res
;
677 /* This is the first time we process that request. Get a
680 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
682 free_rd_atomic_resource(qp
, res
);
683 rxe_advance_resp_resource(qp
);
685 res
->type
= RXE_READ_MASK
;
688 res
->read
.va
= qp
->resp
.va
;
689 res
->read
.va_org
= qp
->resp
.va
;
691 res
->first_psn
= req_pkt
->psn
;
693 if (reth_len(req_pkt
)) {
694 res
->last_psn
= (req_pkt
->psn
+
695 (reth_len(req_pkt
) + mtu
- 1) /
696 mtu
- 1) & BTH_PSN_MASK
;
698 res
->last_psn
= res
->first_psn
;
700 res
->cur_psn
= req_pkt
->psn
;
702 res
->read
.resid
= qp
->resp
.resid
;
703 res
->read
.length
= qp
->resp
.resid
;
704 res
->read
.rkey
= qp
->resp
.rkey
;
706 /* note res inherits the reference to mr from qp */
707 res
->read
.mr
= qp
->resp
.mr
;
711 res
->state
= rdatm_res_state_new
;
714 if (res
->state
== rdatm_res_state_new
) {
715 if (res
->read
.resid
<= mtu
)
716 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY
;
718 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST
;
720 if (res
->read
.resid
> mtu
)
721 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
;
723 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST
;
726 res
->state
= rdatm_res_state_next
;
728 payload
= min_t(int, res
->read
.resid
, mtu
);
730 skb
= prepare_ack_packet(qp
, req_pkt
, &ack_pkt
, opcode
, payload
,
731 res
->cur_psn
, AETH_ACK_UNLIMITED
, &icrc
);
733 return RESPST_ERR_RNR
;
735 err
= rxe_mem_copy(res
->read
.mr
, res
->read
.va
, payload_addr(&ack_pkt
),
736 payload
, from_mem_obj
, &icrc
);
738 pr_err("Failed copying memory\n");
740 if (bth_pad(&ack_pkt
)) {
741 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
742 u8
*pad
= payload_addr(&ack_pkt
) + payload
;
744 memset(pad
, 0, bth_pad(&ack_pkt
));
745 icrc
= rxe_crc32(rxe
, icrc
, pad
, bth_pad(&ack_pkt
));
747 p
= payload_addr(&ack_pkt
) + payload
+ bth_pad(&ack_pkt
);
750 err
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb
);
752 pr_err("Failed sending RDMA reply.\n");
753 return RESPST_ERR_RNR
;
756 res
->read
.va
+= payload
;
757 res
->read
.resid
-= payload
;
758 res
->cur_psn
= (res
->cur_psn
+ 1) & BTH_PSN_MASK
;
760 if (res
->read
.resid
> 0) {
765 qp
->resp
.opcode
= -1;
766 if (psn_compare(res
->cur_psn
, qp
->resp
.psn
) >= 0)
767 qp
->resp
.psn
= res
->cur_psn
;
768 state
= RESPST_CLEANUP
;
774 static void build_rdma_network_hdr(union rdma_network_hdr
*hdr
,
775 struct rxe_pkt_info
*pkt
)
777 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
779 memset(hdr
, 0, sizeof(*hdr
));
780 if (skb
->protocol
== htons(ETH_P_IP
))
781 memcpy(&hdr
->roce4grh
, ip_hdr(skb
), sizeof(hdr
->roce4grh
));
782 else if (skb
->protocol
== htons(ETH_P_IPV6
))
783 memcpy(&hdr
->ibgrh
, ipv6_hdr(skb
), sizeof(hdr
->ibgrh
));
786 /* Executes a new request. A retried request never reach that function (send
787 * and writes are discarded, and reads and atomics are retried elsewhere.
789 static enum resp_states
execute(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
)
791 enum resp_states err
;
793 if (pkt
->mask
& RXE_SEND_MASK
) {
794 if (qp_type(qp
) == IB_QPT_UD
||
795 qp_type(qp
) == IB_QPT_SMI
||
796 qp_type(qp
) == IB_QPT_GSI
) {
797 union rdma_network_hdr hdr
;
799 build_rdma_network_hdr(&hdr
, pkt
);
801 err
= send_data_in(qp
, &hdr
, sizeof(hdr
));
805 err
= send_data_in(qp
, payload_addr(pkt
), payload_size(pkt
));
808 } else if (pkt
->mask
& RXE_WRITE_MASK
) {
809 err
= write_data_in(qp
, pkt
);
812 } else if (pkt
->mask
& RXE_READ_MASK
) {
813 /* For RDMA Read we can increment the msn now. See C9-148. */
815 return RESPST_READ_REPLY
;
816 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
817 err
= process_atomic(qp
, pkt
);
825 /* next expected psn, read handles this separately */
826 qp
->resp
.psn
= (pkt
->psn
+ 1) & BTH_PSN_MASK
;
827 qp
->resp
.ack_psn
= qp
->resp
.psn
;
829 qp
->resp
.opcode
= pkt
->opcode
;
830 qp
->resp
.status
= IB_WC_SUCCESS
;
832 if (pkt
->mask
& RXE_COMP_MASK
) {
833 /* We successfully processed this new request. */
835 return RESPST_COMPLETE
;
836 } else if (qp_type(qp
) == IB_QPT_RC
)
837 return RESPST_ACKNOWLEDGE
;
839 return RESPST_CLEANUP
;
842 static enum resp_states
do_complete(struct rxe_qp
*qp
,
843 struct rxe_pkt_info
*pkt
)
846 struct ib_wc
*wc
= &cqe
.ibwc
;
847 struct ib_uverbs_wc
*uwc
= &cqe
.uibwc
;
848 struct rxe_recv_wqe
*wqe
= qp
->resp
.wqe
;
851 return RESPST_CLEANUP
;
853 memset(&cqe
, 0, sizeof(cqe
));
855 if (qp
->rcq
->is_user
) {
856 uwc
->status
= qp
->resp
.status
;
857 uwc
->qp_num
= qp
->ibqp
.qp_num
;
858 uwc
->wr_id
= wqe
->wr_id
;
860 wc
->status
= qp
->resp
.status
;
862 wc
->wr_id
= wqe
->wr_id
;
865 if (wc
->status
== IB_WC_SUCCESS
) {
866 wc
->opcode
= (pkt
->mask
& RXE_IMMDT_MASK
&&
867 pkt
->mask
& RXE_WRITE_MASK
) ?
868 IB_WC_RECV_RDMA_WITH_IMM
: IB_WC_RECV
;
870 wc
->byte_len
= (pkt
->mask
& RXE_IMMDT_MASK
&&
871 pkt
->mask
& RXE_WRITE_MASK
) ?
872 qp
->resp
.length
: wqe
->dma
.length
- wqe
->dma
.resid
;
874 /* fields after byte_len are different between kernel and user
877 if (qp
->rcq
->is_user
) {
878 uwc
->wc_flags
= IB_WC_GRH
;
880 if (pkt
->mask
& RXE_IMMDT_MASK
) {
881 uwc
->wc_flags
|= IB_WC_WITH_IMM
;
882 uwc
->ex
.imm_data
= immdt_imm(pkt
);
885 if (pkt
->mask
& RXE_IETH_MASK
) {
886 uwc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
887 uwc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
890 uwc
->qp_num
= qp
->ibqp
.qp_num
;
892 if (pkt
->mask
& RXE_DETH_MASK
)
893 uwc
->src_qp
= deth_sqp(pkt
);
895 uwc
->port_num
= qp
->attr
.port_num
;
897 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
899 wc
->wc_flags
= IB_WC_GRH
| IB_WC_WITH_NETWORK_HDR_TYPE
;
900 if (skb
->protocol
== htons(ETH_P_IP
))
901 wc
->network_hdr_type
= RDMA_NETWORK_IPV4
;
903 wc
->network_hdr_type
= RDMA_NETWORK_IPV6
;
905 if (is_vlan_dev(skb
->dev
)) {
906 wc
->wc_flags
|= IB_WC_WITH_VLAN
;
907 wc
->vlan_id
= vlan_dev_vlan_id(skb
->dev
);
910 if (pkt
->mask
& RXE_IMMDT_MASK
) {
911 wc
->wc_flags
|= IB_WC_WITH_IMM
;
912 wc
->ex
.imm_data
= immdt_imm(pkt
);
915 if (pkt
->mask
& RXE_IETH_MASK
) {
916 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
919 wc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
920 wc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
922 rmr
= rxe_pool_get_index(&rxe
->mr_pool
,
923 wc
->ex
.invalidate_rkey
>> 8);
924 if (unlikely(!rmr
)) {
925 pr_err("Bad rkey %#x invalidation\n",
926 wc
->ex
.invalidate_rkey
);
929 rmr
->state
= RXE_MEM_STATE_FREE
;
935 if (pkt
->mask
& RXE_DETH_MASK
)
936 wc
->src_qp
= deth_sqp(pkt
);
938 wc
->port_num
= qp
->attr
.port_num
;
942 /* have copy for srq and reference for !srq */
944 advance_consumer(qp
->rq
.queue
);
948 if (rxe_cq_post(qp
->rcq
, &cqe
, pkt
? bth_se(pkt
) : 1))
949 return RESPST_ERR_CQ_OVERFLOW
;
951 if (qp
->resp
.state
== QP_STATE_ERROR
)
952 return RESPST_CHK_RESOURCE
;
956 else if (qp_type(qp
) == IB_QPT_RC
)
957 return RESPST_ACKNOWLEDGE
;
959 return RESPST_CLEANUP
;
962 static int send_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
963 u8 syndrome
, u32 psn
)
966 struct rxe_pkt_info ack_pkt
;
968 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
970 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
, IB_OPCODE_RC_ACKNOWLEDGE
,
971 0, psn
, syndrome
, NULL
);
977 err
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb
);
979 pr_err_ratelimited("Failed sending ack\n");
985 static int send_atomic_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
989 struct rxe_pkt_info ack_pkt
;
991 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
992 struct resp_res
*res
;
994 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
,
995 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE
, 0, pkt
->psn
,
1004 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
1005 free_rd_atomic_resource(qp
, res
);
1006 rxe_advance_resp_resource(qp
);
1008 memcpy(SKB_TO_PKT(skb
), &ack_pkt
, sizeof(ack_pkt
));
1009 memset((unsigned char *)SKB_TO_PKT(skb
) + sizeof(ack_pkt
), 0,
1010 sizeof(skb
->cb
) - sizeof(ack_pkt
));
1013 res
->type
= RXE_ATOMIC_MASK
;
1014 res
->atomic
.skb
= skb
;
1015 res
->first_psn
= ack_pkt
.psn
;
1016 res
->last_psn
= ack_pkt
.psn
;
1017 res
->cur_psn
= ack_pkt
.psn
;
1019 rc
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb
);
1021 pr_err_ratelimited("Failed sending ack\n");
1028 static enum resp_states
acknowledge(struct rxe_qp
*qp
,
1029 struct rxe_pkt_info
*pkt
)
1031 if (qp_type(qp
) != IB_QPT_RC
)
1032 return RESPST_CLEANUP
;
1034 if (qp
->resp
.aeth_syndrome
!= AETH_ACK_UNLIMITED
)
1035 send_ack(qp
, pkt
, qp
->resp
.aeth_syndrome
, pkt
->psn
);
1036 else if (pkt
->mask
& RXE_ATOMIC_MASK
)
1037 send_atomic_ack(qp
, pkt
, AETH_ACK_UNLIMITED
);
1038 else if (bth_ack(pkt
))
1039 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, pkt
->psn
);
1041 return RESPST_CLEANUP
;
1044 static enum resp_states
cleanup(struct rxe_qp
*qp
,
1045 struct rxe_pkt_info
*pkt
)
1047 struct sk_buff
*skb
;
1050 skb
= skb_dequeue(&qp
->req_pkts
);
1056 rxe_drop_ref(qp
->resp
.mr
);
1063 static struct resp_res
*find_resource(struct rxe_qp
*qp
, u32 psn
)
1067 for (i
= 0; i
< qp
->attr
.max_dest_rd_atomic
; i
++) {
1068 struct resp_res
*res
= &qp
->resp
.resources
[i
];
1073 if (psn_compare(psn
, res
->first_psn
) >= 0 &&
1074 psn_compare(psn
, res
->last_psn
) <= 0) {
1082 static enum resp_states
duplicate_request(struct rxe_qp
*qp
,
1083 struct rxe_pkt_info
*pkt
)
1085 enum resp_states rc
;
1086 u32 prev_psn
= (qp
->resp
.ack_psn
- 1) & BTH_PSN_MASK
;
1088 if (pkt
->mask
& RXE_SEND_MASK
||
1089 pkt
->mask
& RXE_WRITE_MASK
) {
1090 /* SEND. Ack again and cleanup. C9-105. */
1092 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, prev_psn
);
1093 rc
= RESPST_CLEANUP
;
1095 } else if (pkt
->mask
& RXE_READ_MASK
) {
1096 struct resp_res
*res
;
1098 res
= find_resource(qp
, pkt
->psn
);
1100 /* Resource not found. Class D error. Drop the
1103 rc
= RESPST_CLEANUP
;
1106 /* Ensure this new request is the same as the previous
1107 * one or a subset of it.
1109 u64 iova
= reth_va(pkt
);
1110 u32 resid
= reth_len(pkt
);
1112 if (iova
< res
->read
.va_org
||
1113 resid
> res
->read
.length
||
1114 (iova
+ resid
) > (res
->read
.va_org
+
1115 res
->read
.length
)) {
1116 rc
= RESPST_CLEANUP
;
1120 if (reth_rkey(pkt
) != res
->read
.rkey
) {
1121 rc
= RESPST_CLEANUP
;
1125 res
->cur_psn
= pkt
->psn
;
1126 res
->state
= (pkt
->psn
== res
->first_psn
) ?
1127 rdatm_res_state_new
:
1128 rdatm_res_state_replay
;
1131 /* Reset the resource, except length. */
1132 res
->read
.va_org
= iova
;
1133 res
->read
.va
= iova
;
1134 res
->read
.resid
= resid
;
1136 /* Replay the RDMA read reply. */
1138 rc
= RESPST_READ_REPLY
;
1142 struct resp_res
*res
;
1144 /* Find the operation in our list of responder resources. */
1145 res
= find_resource(qp
, pkt
->psn
);
1147 skb_get(res
->atomic
.skb
);
1148 /* Resend the result. */
1149 rc
= rxe_xmit_packet(to_rdev(qp
->ibqp
.device
), qp
,
1150 pkt
, res
->atomic
.skb
);
1152 pr_err("Failed resending result. This flow is not handled - skb ignored\n");
1153 rc
= RESPST_CLEANUP
;
1158 /* Resource not found. Class D error. Drop the request. */
1159 rc
= RESPST_CLEANUP
;
1166 /* Process a class A or C. Both are treated the same in this implementation. */
1167 static void do_class_ac_error(struct rxe_qp
*qp
, u8 syndrome
,
1168 enum ib_wc_status status
)
1170 qp
->resp
.aeth_syndrome
= syndrome
;
1171 qp
->resp
.status
= status
;
1173 /* indicate that we should go through the ERROR state */
1174 qp
->resp
.goto_error
= 1;
1177 static enum resp_states
do_class_d1e_error(struct rxe_qp
*qp
)
1182 qp
->resp
.drop_msg
= 1;
1184 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1185 return RESPST_COMPLETE
;
1187 return RESPST_CLEANUP
;
1190 /* Class D1. This packet may be the start of a
1191 * new message and could be valid. The previous
1192 * message is invalid and ignored. reset the
1193 * recv wr to its original state
1196 qp
->resp
.wqe
->dma
.resid
= qp
->resp
.wqe
->dma
.length
;
1197 qp
->resp
.wqe
->dma
.cur_sge
= 0;
1198 qp
->resp
.wqe
->dma
.sge_offset
= 0;
1199 qp
->resp
.opcode
= -1;
1203 rxe_drop_ref(qp
->resp
.mr
);
1207 return RESPST_CLEANUP
;
1211 static void rxe_drain_req_pkts(struct rxe_qp
*qp
, bool notify
)
1213 struct sk_buff
*skb
;
1215 while ((skb
= skb_dequeue(&qp
->req_pkts
))) {
1223 while (!qp
->srq
&& qp
->rq
.queue
&& queue_head(qp
->rq
.queue
))
1224 advance_consumer(qp
->rq
.queue
);
1227 int rxe_responder(void *arg
)
1229 struct rxe_qp
*qp
= (struct rxe_qp
*)arg
;
1230 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
1231 enum resp_states state
;
1232 struct rxe_pkt_info
*pkt
= NULL
;
1237 qp
->resp
.aeth_syndrome
= AETH_ACK_UNLIMITED
;
1244 switch (qp
->resp
.state
) {
1245 case QP_STATE_RESET
:
1246 state
= RESPST_RESET
;
1250 state
= RESPST_GET_REQ
;
1255 pr_debug("qp#%d state = %s\n", qp_num(qp
),
1256 resp_state_name
[state
]);
1258 case RESPST_GET_REQ
:
1259 state
= get_req(qp
, &pkt
);
1261 case RESPST_CHK_PSN
:
1262 state
= check_psn(qp
, pkt
);
1264 case RESPST_CHK_OP_SEQ
:
1265 state
= check_op_seq(qp
, pkt
);
1267 case RESPST_CHK_OP_VALID
:
1268 state
= check_op_valid(qp
, pkt
);
1270 case RESPST_CHK_RESOURCE
:
1271 state
= check_resource(qp
, pkt
);
1273 case RESPST_CHK_LENGTH
:
1274 state
= check_length(qp
, pkt
);
1276 case RESPST_CHK_RKEY
:
1277 state
= check_rkey(qp
, pkt
);
1279 case RESPST_EXECUTE
:
1280 state
= execute(qp
, pkt
);
1282 case RESPST_COMPLETE
:
1283 state
= do_complete(qp
, pkt
);
1285 case RESPST_READ_REPLY
:
1286 state
= read_reply(qp
, pkt
);
1288 case RESPST_ACKNOWLEDGE
:
1289 state
= acknowledge(qp
, pkt
);
1291 case RESPST_CLEANUP
:
1292 state
= cleanup(qp
, pkt
);
1294 case RESPST_DUPLICATE_REQUEST
:
1295 state
= duplicate_request(qp
, pkt
);
1297 case RESPST_ERR_PSN_OUT_OF_SEQ
:
1298 /* RC only - Class B. Drop packet. */
1299 send_ack(qp
, pkt
, AETH_NAK_PSN_SEQ_ERROR
, qp
->resp
.psn
);
1300 state
= RESPST_CLEANUP
;
1303 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
:
1304 case RESPST_ERR_MISSING_OPCODE_FIRST
:
1305 case RESPST_ERR_MISSING_OPCODE_LAST_C
:
1306 case RESPST_ERR_UNSUPPORTED_OPCODE
:
1307 case RESPST_ERR_MISALIGNED_ATOMIC
:
1308 /* RC Only - Class C. */
1309 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1310 IB_WC_REM_INV_REQ_ERR
);
1311 state
= RESPST_COMPLETE
;
1314 case RESPST_ERR_MISSING_OPCODE_LAST_D1E
:
1315 state
= do_class_d1e_error(qp
);
1317 case RESPST_ERR_RNR
:
1318 if (qp_type(qp
) == IB_QPT_RC
) {
1319 rxe_counter_inc(rxe
, RXE_CNT_SND_RNR
);
1321 send_ack(qp
, pkt
, AETH_RNR_NAK
|
1323 qp
->attr
.min_rnr_timer
),
1326 /* UD/UC - class D */
1327 qp
->resp
.drop_msg
= 1;
1329 state
= RESPST_CLEANUP
;
1332 case RESPST_ERR_RKEY_VIOLATION
:
1333 if (qp_type(qp
) == IB_QPT_RC
) {
1335 do_class_ac_error(qp
, AETH_NAK_REM_ACC_ERR
,
1336 IB_WC_REM_ACCESS_ERR
);
1337 state
= RESPST_COMPLETE
;
1339 qp
->resp
.drop_msg
= 1;
1341 /* UC/SRQ Class D */
1342 qp
->resp
.status
= IB_WC_REM_ACCESS_ERR
;
1343 state
= RESPST_COMPLETE
;
1345 /* UC/non-SRQ Class E. */
1346 state
= RESPST_CLEANUP
;
1351 case RESPST_ERR_LENGTH
:
1352 if (qp_type(qp
) == IB_QPT_RC
) {
1354 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1355 IB_WC_REM_INV_REQ_ERR
);
1356 state
= RESPST_COMPLETE
;
1357 } else if (qp
->srq
) {
1358 /* UC/UD - class E */
1359 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1360 state
= RESPST_COMPLETE
;
1362 /* UC/UD - class D */
1363 qp
->resp
.drop_msg
= 1;
1364 state
= RESPST_CLEANUP
;
1368 case RESPST_ERR_MALFORMED_WQE
:
1370 do_class_ac_error(qp
, AETH_NAK_REM_OP_ERR
,
1371 IB_WC_LOC_QP_OP_ERR
);
1372 state
= RESPST_COMPLETE
;
1375 case RESPST_ERR_CQ_OVERFLOW
:
1377 state
= RESPST_ERROR
;
1381 if (qp
->resp
.goto_error
) {
1382 state
= RESPST_ERROR
;
1389 if (qp
->resp
.goto_error
) {
1390 state
= RESPST_ERROR
;
1397 rxe_drain_req_pkts(qp
, false);
1398 qp
->resp
.wqe
= NULL
;
1402 qp
->resp
.goto_error
= 0;
1403 pr_warn("qp#%d moved to error state\n", qp_num(qp
));