2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/skbuff.h>
38 #include "rxe_queue.h"
54 RESPST_DUPLICATE_REQUEST
,
55 RESPST_ERR_MALFORMED_WQE
,
56 RESPST_ERR_UNSUPPORTED_OPCODE
,
57 RESPST_ERR_MISALIGNED_ATOMIC
,
58 RESPST_ERR_PSN_OUT_OF_SEQ
,
59 RESPST_ERR_MISSING_OPCODE_FIRST
,
60 RESPST_ERR_MISSING_OPCODE_LAST_C
,
61 RESPST_ERR_MISSING_OPCODE_LAST_D1E
,
62 RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
,
64 RESPST_ERR_RKEY_VIOLATION
,
66 RESPST_ERR_CQ_OVERFLOW
,
73 static char *resp_state_name
[] = {
74 [RESPST_NONE
] = "NONE",
75 [RESPST_GET_REQ
] = "GET_REQ",
76 [RESPST_CHK_PSN
] = "CHK_PSN",
77 [RESPST_CHK_OP_SEQ
] = "CHK_OP_SEQ",
78 [RESPST_CHK_OP_VALID
] = "CHK_OP_VALID",
79 [RESPST_CHK_RESOURCE
] = "CHK_RESOURCE",
80 [RESPST_CHK_LENGTH
] = "CHK_LENGTH",
81 [RESPST_CHK_RKEY
] = "CHK_RKEY",
82 [RESPST_EXECUTE
] = "EXECUTE",
83 [RESPST_READ_REPLY
] = "READ_REPLY",
84 [RESPST_COMPLETE
] = "COMPLETE",
85 [RESPST_ACKNOWLEDGE
] = "ACKNOWLEDGE",
86 [RESPST_CLEANUP
] = "CLEANUP",
87 [RESPST_DUPLICATE_REQUEST
] = "DUPLICATE_REQUEST",
88 [RESPST_ERR_MALFORMED_WQE
] = "ERR_MALFORMED_WQE",
89 [RESPST_ERR_UNSUPPORTED_OPCODE
] = "ERR_UNSUPPORTED_OPCODE",
90 [RESPST_ERR_MISALIGNED_ATOMIC
] = "ERR_MISALIGNED_ATOMIC",
91 [RESPST_ERR_PSN_OUT_OF_SEQ
] = "ERR_PSN_OUT_OF_SEQ",
92 [RESPST_ERR_MISSING_OPCODE_FIRST
] = "ERR_MISSING_OPCODE_FIRST",
93 [RESPST_ERR_MISSING_OPCODE_LAST_C
] = "ERR_MISSING_OPCODE_LAST_C",
94 [RESPST_ERR_MISSING_OPCODE_LAST_D1E
] = "ERR_MISSING_OPCODE_LAST_D1E",
95 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
] = "ERR_TOO_MANY_RDMA_ATM_REQ",
96 [RESPST_ERR_RNR
] = "ERR_RNR",
97 [RESPST_ERR_RKEY_VIOLATION
] = "ERR_RKEY_VIOLATION",
98 [RESPST_ERR_LENGTH
] = "ERR_LENGTH",
99 [RESPST_ERR_CQ_OVERFLOW
] = "ERR_CQ_OVERFLOW",
100 [RESPST_ERROR
] = "ERROR",
101 [RESPST_RESET
] = "RESET",
102 [RESPST_DONE
] = "DONE",
103 [RESPST_EXIT
] = "EXIT",
106 /* rxe_recv calls here to add a request packet to the input queue */
107 void rxe_resp_queue_pkt(struct rxe_dev
*rxe
, struct rxe_qp
*qp
,
111 struct rxe_pkt_info
*pkt
= SKB_TO_PKT(skb
);
113 skb_queue_tail(&qp
->req_pkts
, skb
);
115 must_sched
= (pkt
->opcode
== IB_OPCODE_RC_RDMA_READ_REQUEST
) ||
116 (skb_queue_len(&qp
->req_pkts
) > 1);
118 rxe_run_task(&qp
->resp
.task
, must_sched
);
121 static inline enum resp_states
get_req(struct rxe_qp
*qp
,
122 struct rxe_pkt_info
**pkt_p
)
126 if (qp
->resp
.state
== QP_STATE_ERROR
) {
127 skb
= skb_dequeue(&qp
->req_pkts
);
129 /* drain request packet queue */
132 return RESPST_GET_REQ
;
135 /* go drain recv wr queue */
136 return RESPST_CHK_RESOURCE
;
139 skb
= skb_peek(&qp
->req_pkts
);
143 *pkt_p
= SKB_TO_PKT(skb
);
145 return (qp
->resp
.res
) ? RESPST_READ_REPLY
: RESPST_CHK_PSN
;
148 static enum resp_states
check_psn(struct rxe_qp
*qp
,
149 struct rxe_pkt_info
*pkt
)
151 int diff
= psn_compare(pkt
->psn
, qp
->resp
.psn
);
153 switch (qp_type(qp
)) {
156 if (qp
->resp
.sent_psn_nak
)
157 return RESPST_CLEANUP
;
159 qp
->resp
.sent_psn_nak
= 1;
160 return RESPST_ERR_PSN_OUT_OF_SEQ
;
162 } else if (diff
< 0) {
163 return RESPST_DUPLICATE_REQUEST
;
166 if (qp
->resp
.sent_psn_nak
)
167 qp
->resp
.sent_psn_nak
= 0;
172 if (qp
->resp
.drop_msg
|| diff
!= 0) {
173 if (pkt
->mask
& RXE_START_MASK
) {
174 qp
->resp
.drop_msg
= 0;
175 return RESPST_CHK_OP_SEQ
;
178 qp
->resp
.drop_msg
= 1;
179 return RESPST_CLEANUP
;
186 return RESPST_CHK_OP_SEQ
;
189 static enum resp_states
check_op_seq(struct rxe_qp
*qp
,
190 struct rxe_pkt_info
*pkt
)
192 switch (qp_type(qp
)) {
194 switch (qp
->resp
.opcode
) {
195 case IB_OPCODE_RC_SEND_FIRST
:
196 case IB_OPCODE_RC_SEND_MIDDLE
:
197 switch (pkt
->opcode
) {
198 case IB_OPCODE_RC_SEND_MIDDLE
:
199 case IB_OPCODE_RC_SEND_LAST
:
200 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
201 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
202 return RESPST_CHK_OP_VALID
;
204 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
207 case IB_OPCODE_RC_RDMA_WRITE_FIRST
:
208 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
209 switch (pkt
->opcode
) {
210 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
211 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
212 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
213 return RESPST_CHK_OP_VALID
;
215 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
219 switch (pkt
->opcode
) {
220 case IB_OPCODE_RC_SEND_MIDDLE
:
221 case IB_OPCODE_RC_SEND_LAST
:
222 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
223 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
224 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
225 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
226 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
227 return RESPST_ERR_MISSING_OPCODE_FIRST
;
229 return RESPST_CHK_OP_VALID
;
235 switch (qp
->resp
.opcode
) {
236 case IB_OPCODE_UC_SEND_FIRST
:
237 case IB_OPCODE_UC_SEND_MIDDLE
:
238 switch (pkt
->opcode
) {
239 case IB_OPCODE_UC_SEND_MIDDLE
:
240 case IB_OPCODE_UC_SEND_LAST
:
241 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
242 return RESPST_CHK_OP_VALID
;
244 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
247 case IB_OPCODE_UC_RDMA_WRITE_FIRST
:
248 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
249 switch (pkt
->opcode
) {
250 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
251 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
252 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
253 return RESPST_CHK_OP_VALID
;
255 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
259 switch (pkt
->opcode
) {
260 case IB_OPCODE_UC_SEND_MIDDLE
:
261 case IB_OPCODE_UC_SEND_LAST
:
262 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
263 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
264 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
265 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
266 qp
->resp
.drop_msg
= 1;
267 return RESPST_CLEANUP
;
269 return RESPST_CHK_OP_VALID
;
275 return RESPST_CHK_OP_VALID
;
279 static enum resp_states
check_op_valid(struct rxe_qp
*qp
,
280 struct rxe_pkt_info
*pkt
)
282 switch (qp_type(qp
)) {
284 if (((pkt
->mask
& RXE_READ_MASK
) &&
285 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_READ
)) ||
286 ((pkt
->mask
& RXE_WRITE_MASK
) &&
287 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) ||
288 ((pkt
->mask
& RXE_ATOMIC_MASK
) &&
289 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_ATOMIC
))) {
290 return RESPST_ERR_UNSUPPORTED_OPCODE
;
296 if ((pkt
->mask
& RXE_WRITE_MASK
) &&
297 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) {
298 qp
->resp
.drop_msg
= 1;
299 return RESPST_CLEANUP
;
314 return RESPST_CHK_RESOURCE
;
317 static enum resp_states
get_srq_wqe(struct rxe_qp
*qp
)
319 struct rxe_srq
*srq
= qp
->srq
;
320 struct rxe_queue
*q
= srq
->rq
.queue
;
321 struct rxe_recv_wqe
*wqe
;
325 return RESPST_ERR_RNR
;
327 spin_lock_bh(&srq
->rq
.consumer_lock
);
331 spin_unlock_bh(&srq
->rq
.consumer_lock
);
332 return RESPST_ERR_RNR
;
335 /* note kernel and user space recv wqes have same size */
336 memcpy(&qp
->resp
.srq_wqe
, wqe
, sizeof(qp
->resp
.srq_wqe
));
338 qp
->resp
.wqe
= &qp
->resp
.srq_wqe
.wqe
;
341 if (srq
->limit
&& srq
->ibsrq
.event_handler
&&
342 (queue_count(q
) < srq
->limit
)) {
347 spin_unlock_bh(&srq
->rq
.consumer_lock
);
348 return RESPST_CHK_LENGTH
;
351 spin_unlock_bh(&srq
->rq
.consumer_lock
);
352 ev
.device
= qp
->ibqp
.device
;
353 ev
.element
.srq
= qp
->ibqp
.srq
;
354 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
355 srq
->ibsrq
.event_handler(&ev
, srq
->ibsrq
.srq_context
);
356 return RESPST_CHK_LENGTH
;
359 static enum resp_states
check_resource(struct rxe_qp
*qp
,
360 struct rxe_pkt_info
*pkt
)
362 struct rxe_srq
*srq
= qp
->srq
;
364 if (qp
->resp
.state
== QP_STATE_ERROR
) {
366 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
367 return RESPST_COMPLETE
;
369 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
371 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
372 return RESPST_COMPLETE
;
381 if (pkt
->mask
& RXE_READ_OR_ATOMIC
) {
382 /* it is the requesters job to not send
383 * too many read/atomic ops, we just
384 * recycle the responder resource queue
386 if (likely(qp
->attr
.max_dest_rd_atomic
> 0))
387 return RESPST_CHK_LENGTH
;
389 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
;
392 if (pkt
->mask
& RXE_RWR_MASK
) {
394 return get_srq_wqe(qp
);
396 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
397 return (qp
->resp
.wqe
) ? RESPST_CHK_LENGTH
: RESPST_ERR_RNR
;
400 return RESPST_CHK_LENGTH
;
403 static enum resp_states
check_length(struct rxe_qp
*qp
,
404 struct rxe_pkt_info
*pkt
)
406 switch (qp_type(qp
)) {
408 return RESPST_CHK_RKEY
;
411 return RESPST_CHK_RKEY
;
414 return RESPST_CHK_RKEY
;
418 static enum resp_states
check_rkey(struct rxe_qp
*qp
,
419 struct rxe_pkt_info
*pkt
)
421 struct rxe_mem
*mem
= NULL
;
427 enum resp_states state
;
430 if (pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_MASK
)) {
431 if (pkt
->mask
& RXE_RETH_MASK
) {
432 qp
->resp
.va
= reth_va(pkt
);
433 qp
->resp
.rkey
= reth_rkey(pkt
);
434 qp
->resp
.resid
= reth_len(pkt
);
436 access
= (pkt
->mask
& RXE_READ_MASK
) ? IB_ACCESS_REMOTE_READ
437 : IB_ACCESS_REMOTE_WRITE
;
438 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
439 qp
->resp
.va
= atmeth_va(pkt
);
440 qp
->resp
.rkey
= atmeth_rkey(pkt
);
441 qp
->resp
.resid
= sizeof(u64
);
442 access
= IB_ACCESS_REMOTE_ATOMIC
;
444 return RESPST_EXECUTE
;
447 /* A zero-byte op is not required to set an addr or rkey. */
448 if ((pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_OR_SEND
)) &&
449 (pkt
->mask
& RXE_RETH_MASK
) &&
450 reth_len(pkt
) == 0) {
451 return RESPST_EXECUTE
;
455 rkey
= qp
->resp
.rkey
;
456 resid
= qp
->resp
.resid
;
457 pktlen
= payload_size(pkt
);
459 mem
= lookup_mem(qp
->pd
, access
, rkey
, lookup_remote
);
461 state
= RESPST_ERR_RKEY_VIOLATION
;
465 if (unlikely(mem
->state
== RXE_MEM_STATE_FREE
)) {
466 state
= RESPST_ERR_RKEY_VIOLATION
;
470 if (mem_check_range(mem
, va
, resid
)) {
471 state
= RESPST_ERR_RKEY_VIOLATION
;
475 if (pkt
->mask
& RXE_WRITE_MASK
) {
477 if (pktlen
!= mtu
|| bth_pad(pkt
)) {
478 state
= RESPST_ERR_LENGTH
;
482 qp
->resp
.resid
= mtu
;
484 if (pktlen
!= resid
) {
485 state
= RESPST_ERR_LENGTH
;
488 if ((bth_pad(pkt
) != (0x3 & (-resid
)))) {
489 /* This case may not be exactly that
490 * but nothing else fits.
492 state
= RESPST_ERR_LENGTH
;
498 WARN_ON_ONCE(qp
->resp
.mr
);
501 return RESPST_EXECUTE
;
509 static enum resp_states
send_data_in(struct rxe_qp
*qp
, void *data_addr
,
513 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
515 err
= copy_data(rxe
, qp
->pd
, IB_ACCESS_LOCAL_WRITE
, &qp
->resp
.wqe
->dma
,
516 data_addr
, data_len
, to_mem_obj
, NULL
);
518 return (err
== -ENOSPC
) ? RESPST_ERR_LENGTH
519 : RESPST_ERR_MALFORMED_WQE
;
524 static enum resp_states
write_data_in(struct rxe_qp
*qp
,
525 struct rxe_pkt_info
*pkt
)
527 enum resp_states rc
= RESPST_NONE
;
529 int data_len
= payload_size(pkt
);
531 err
= rxe_mem_copy(qp
->resp
.mr
, qp
->resp
.va
, payload_addr(pkt
),
532 data_len
, to_mem_obj
, NULL
);
534 rc
= RESPST_ERR_RKEY_VIOLATION
;
538 qp
->resp
.va
+= data_len
;
539 qp
->resp
.resid
-= data_len
;
545 /* Guarantee atomicity of atomic operations at the machine level. */
546 static DEFINE_SPINLOCK(atomic_ops_lock
);
548 static enum resp_states
process_atomic(struct rxe_qp
*qp
,
549 struct rxe_pkt_info
*pkt
)
551 u64 iova
= atmeth_va(pkt
);
553 enum resp_states ret
;
554 struct rxe_mem
*mr
= qp
->resp
.mr
;
556 if (mr
->state
!= RXE_MEM_STATE_VALID
) {
557 ret
= RESPST_ERR_RKEY_VIOLATION
;
561 vaddr
= iova_to_vaddr(mr
, iova
, sizeof(u64
));
563 /* check vaddr is 8 bytes aligned. */
564 if (!vaddr
|| (uintptr_t)vaddr
& 7) {
565 ret
= RESPST_ERR_MISALIGNED_ATOMIC
;
569 spin_lock_bh(&atomic_ops_lock
);
571 qp
->resp
.atomic_orig
= *vaddr
;
573 if (pkt
->opcode
== IB_OPCODE_RC_COMPARE_SWAP
||
574 pkt
->opcode
== IB_OPCODE_RD_COMPARE_SWAP
) {
575 if (*vaddr
== atmeth_comp(pkt
))
576 *vaddr
= atmeth_swap_add(pkt
);
578 *vaddr
+= atmeth_swap_add(pkt
);
581 spin_unlock_bh(&atomic_ops_lock
);
588 static struct sk_buff
*prepare_ack_packet(struct rxe_qp
*qp
,
589 struct rxe_pkt_info
*pkt
,
590 struct rxe_pkt_info
*ack
,
597 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
608 pad
= (-payload
) & 0x3;
609 paylen
= rxe_opcode
[opcode
].length
+ payload
+ pad
+ RXE_ICRC_SIZE
;
611 skb
= rxe_init_packet(rxe
, &qp
->pri_av
, paylen
, ack
);
616 ack
->opcode
= opcode
;
617 ack
->mask
= rxe_opcode
[opcode
].mask
;
618 ack
->offset
= pkt
->offset
;
619 ack
->paylen
= paylen
;
621 /* fill in bth using the request packet headers */
622 memcpy(ack
->hdr
, pkt
->hdr
, pkt
->offset
+ RXE_BTH_BYTES
);
624 bth_set_opcode(ack
, opcode
);
625 bth_set_qpn(ack
, qp
->attr
.dest_qp_num
);
626 bth_set_pad(ack
, pad
);
628 bth_set_psn(ack
, psn
);
632 if (ack
->mask
& RXE_AETH_MASK
) {
633 aeth_set_syn(ack
, syndrome
);
634 aeth_set_msn(ack
, qp
->resp
.msn
);
637 if (ack
->mask
& RXE_ATMACK_MASK
)
638 atmack_set_orig(ack
, qp
->resp
.atomic_orig
);
640 err
= rxe_prepare(rxe
, ack
, skb
, &crc
);
647 /* CRC computation will be continued by the caller */
650 p
= payload_addr(ack
) + payload
+ bth_pad(ack
);
657 /* RDMA read response. If res is not NULL, then we have a current RDMA request
658 * being processed or replayed.
660 static enum resp_states
read_reply(struct rxe_qp
*qp
,
661 struct rxe_pkt_info
*req_pkt
)
663 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
664 struct rxe_pkt_info ack_pkt
;
667 enum resp_states state
;
671 struct resp_res
*res
= qp
->resp
.res
;
676 /* This is the first time we process that request. Get a
679 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
681 free_rd_atomic_resource(qp
, res
);
682 rxe_advance_resp_resource(qp
);
684 res
->type
= RXE_READ_MASK
;
686 res
->read
.va
= qp
->resp
.va
;
687 res
->read
.va_org
= qp
->resp
.va
;
689 res
->first_psn
= req_pkt
->psn
;
691 if (reth_len(req_pkt
)) {
692 res
->last_psn
= (req_pkt
->psn
+
693 (reth_len(req_pkt
) + mtu
- 1) /
694 mtu
- 1) & BTH_PSN_MASK
;
696 res
->last_psn
= res
->first_psn
;
698 res
->cur_psn
= req_pkt
->psn
;
700 res
->read
.resid
= qp
->resp
.resid
;
701 res
->read
.length
= qp
->resp
.resid
;
702 res
->read
.rkey
= qp
->resp
.rkey
;
704 /* note res inherits the reference to mr from qp */
705 res
->read
.mr
= qp
->resp
.mr
;
709 res
->state
= rdatm_res_state_new
;
712 if (res
->state
== rdatm_res_state_new
) {
713 if (res
->read
.resid
<= mtu
)
714 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY
;
716 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST
;
718 if (res
->read
.resid
> mtu
)
719 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
;
721 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST
;
724 res
->state
= rdatm_res_state_next
;
726 payload
= min_t(int, res
->read
.resid
, mtu
);
728 skb
= prepare_ack_packet(qp
, req_pkt
, &ack_pkt
, opcode
, payload
,
729 res
->cur_psn
, AETH_ACK_UNLIMITED
, &icrc
);
731 return RESPST_ERR_RNR
;
733 err
= rxe_mem_copy(res
->read
.mr
, res
->read
.va
, payload_addr(&ack_pkt
),
734 payload
, from_mem_obj
, &icrc
);
736 pr_err("Failed copying memory\n");
738 p
= payload_addr(&ack_pkt
) + payload
+ bth_pad(&ack_pkt
);
741 err
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb
);
743 pr_err("Failed sending RDMA reply.\n");
745 return RESPST_ERR_RNR
;
748 res
->read
.va
+= payload
;
749 res
->read
.resid
-= payload
;
750 res
->cur_psn
= (res
->cur_psn
+ 1) & BTH_PSN_MASK
;
752 if (res
->read
.resid
> 0) {
756 qp
->resp
.opcode
= -1;
757 if (psn_compare(res
->cur_psn
, qp
->resp
.psn
) >= 0)
758 qp
->resp
.psn
= res
->cur_psn
;
759 state
= RESPST_CLEANUP
;
765 static void build_rdma_network_hdr(union rdma_network_hdr
*hdr
,
766 struct rxe_pkt_info
*pkt
)
768 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
770 memset(hdr
, 0, sizeof(*hdr
));
771 if (skb
->protocol
== htons(ETH_P_IP
))
772 memcpy(&hdr
->roce4grh
, ip_hdr(skb
), sizeof(hdr
->roce4grh
));
773 else if (skb
->protocol
== htons(ETH_P_IPV6
))
774 memcpy(&hdr
->ibgrh
, ipv6_hdr(skb
), sizeof(hdr
->ibgrh
));
777 /* Executes a new request. A retried request never reach that function (send
778 * and writes are discarded, and reads and atomics are retried elsewhere.
780 static enum resp_states
execute(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
)
782 enum resp_states err
;
784 if (pkt
->mask
& RXE_SEND_MASK
) {
785 if (qp_type(qp
) == IB_QPT_UD
||
786 qp_type(qp
) == IB_QPT_SMI
||
787 qp_type(qp
) == IB_QPT_GSI
) {
788 union rdma_network_hdr hdr
;
790 build_rdma_network_hdr(&hdr
, pkt
);
792 err
= send_data_in(qp
, &hdr
, sizeof(hdr
));
796 err
= send_data_in(qp
, payload_addr(pkt
), payload_size(pkt
));
799 } else if (pkt
->mask
& RXE_WRITE_MASK
) {
800 err
= write_data_in(qp
, pkt
);
803 } else if (pkt
->mask
& RXE_READ_MASK
) {
804 /* For RDMA Read we can increment the msn now. See C9-148. */
806 return RESPST_READ_REPLY
;
807 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
808 err
= process_atomic(qp
, pkt
);
816 /* next expected psn, read handles this separately */
817 qp
->resp
.psn
= (pkt
->psn
+ 1) & BTH_PSN_MASK
;
819 qp
->resp
.opcode
= pkt
->opcode
;
820 qp
->resp
.status
= IB_WC_SUCCESS
;
822 if (pkt
->mask
& RXE_COMP_MASK
) {
823 /* We successfully processed this new request. */
825 return RESPST_COMPLETE
;
826 } else if (qp_type(qp
) == IB_QPT_RC
)
827 return RESPST_ACKNOWLEDGE
;
829 return RESPST_CLEANUP
;
832 static enum resp_states
do_complete(struct rxe_qp
*qp
,
833 struct rxe_pkt_info
*pkt
)
836 struct ib_wc
*wc
= &cqe
.ibwc
;
837 struct ib_uverbs_wc
*uwc
= &cqe
.uibwc
;
838 struct rxe_recv_wqe
*wqe
= qp
->resp
.wqe
;
841 return RESPST_CLEANUP
;
843 memset(&cqe
, 0, sizeof(cqe
));
845 wc
->wr_id
= wqe
->wr_id
;
846 wc
->status
= qp
->resp
.status
;
849 /* fields after status are not required for errors */
850 if (wc
->status
== IB_WC_SUCCESS
) {
851 wc
->opcode
= (pkt
->mask
& RXE_IMMDT_MASK
&&
852 pkt
->mask
& RXE_WRITE_MASK
) ?
853 IB_WC_RECV_RDMA_WITH_IMM
: IB_WC_RECV
;
855 wc
->byte_len
= wqe
->dma
.length
- wqe
->dma
.resid
;
857 /* fields after byte_len are different between kernel and user
860 if (qp
->rcq
->is_user
) {
861 uwc
->wc_flags
= IB_WC_GRH
;
863 if (pkt
->mask
& RXE_IMMDT_MASK
) {
864 uwc
->wc_flags
|= IB_WC_WITH_IMM
;
866 (__u32 __force
)immdt_imm(pkt
);
869 if (pkt
->mask
& RXE_IETH_MASK
) {
870 uwc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
871 uwc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
874 uwc
->qp_num
= qp
->ibqp
.qp_num
;
876 if (pkt
->mask
& RXE_DETH_MASK
)
877 uwc
->src_qp
= deth_sqp(pkt
);
879 uwc
->port_num
= qp
->attr
.port_num
;
881 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
883 wc
->wc_flags
= IB_WC_GRH
| IB_WC_WITH_NETWORK_HDR_TYPE
;
884 if (skb
->protocol
== htons(ETH_P_IP
))
885 wc
->network_hdr_type
= RDMA_NETWORK_IPV4
;
887 wc
->network_hdr_type
= RDMA_NETWORK_IPV6
;
889 if (pkt
->mask
& RXE_IMMDT_MASK
) {
890 wc
->wc_flags
|= IB_WC_WITH_IMM
;
891 wc
->ex
.imm_data
= immdt_imm(pkt
);
894 if (pkt
->mask
& RXE_IETH_MASK
) {
895 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
898 wc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
899 wc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
901 rmr
= rxe_pool_get_index(&rxe
->mr_pool
,
902 wc
->ex
.invalidate_rkey
>> 8);
903 if (unlikely(!rmr
)) {
904 pr_err("Bad rkey %#x invalidation\n",
905 wc
->ex
.invalidate_rkey
);
908 rmr
->state
= RXE_MEM_STATE_FREE
;
914 if (pkt
->mask
& RXE_DETH_MASK
)
915 wc
->src_qp
= deth_sqp(pkt
);
917 wc
->port_num
= qp
->attr
.port_num
;
921 /* have copy for srq and reference for !srq */
923 advance_consumer(qp
->rq
.queue
);
927 if (rxe_cq_post(qp
->rcq
, &cqe
, pkt
? bth_se(pkt
) : 1))
928 return RESPST_ERR_CQ_OVERFLOW
;
930 if (qp
->resp
.state
== QP_STATE_ERROR
)
931 return RESPST_CHK_RESOURCE
;
935 else if (qp_type(qp
) == IB_QPT_RC
)
936 return RESPST_ACKNOWLEDGE
;
938 return RESPST_CLEANUP
;
941 static int send_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
942 u8 syndrome
, u32 psn
)
945 struct rxe_pkt_info ack_pkt
;
947 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
949 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
, IB_OPCODE_RC_ACKNOWLEDGE
,
950 0, psn
, syndrome
, NULL
);
956 err
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb
);
958 pr_err_ratelimited("Failed sending ack\n");
966 static int send_atomic_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
970 struct rxe_pkt_info ack_pkt
;
972 struct sk_buff
*skb_copy
;
973 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
974 struct resp_res
*res
;
976 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
,
977 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE
, 0, pkt
->psn
,
984 skb_copy
= skb_clone(skb
, GFP_ATOMIC
);
986 rxe_add_ref(qp
); /* for the new SKB */
988 pr_warn("Could not clone atomic response\n");
993 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
994 free_rd_atomic_resource(qp
, res
);
995 rxe_advance_resp_resource(qp
);
997 memcpy(SKB_TO_PKT(skb
), &ack_pkt
, sizeof(skb
->cb
));
999 res
->type
= RXE_ATOMIC_MASK
;
1000 res
->atomic
.skb
= skb
;
1001 res
->first_psn
= ack_pkt
.psn
;
1002 res
->last_psn
= ack_pkt
.psn
;
1003 res
->cur_psn
= ack_pkt
.psn
;
1005 rc
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb_copy
);
1007 pr_err_ratelimited("Failed sending ack\n");
1009 kfree_skb(skb_copy
);
1016 static enum resp_states
acknowledge(struct rxe_qp
*qp
,
1017 struct rxe_pkt_info
*pkt
)
1019 if (qp_type(qp
) != IB_QPT_RC
)
1020 return RESPST_CLEANUP
;
1022 if (qp
->resp
.aeth_syndrome
!= AETH_ACK_UNLIMITED
)
1023 send_ack(qp
, pkt
, qp
->resp
.aeth_syndrome
, pkt
->psn
);
1024 else if (pkt
->mask
& RXE_ATOMIC_MASK
)
1025 send_atomic_ack(qp
, pkt
, AETH_ACK_UNLIMITED
);
1026 else if (bth_ack(pkt
))
1027 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, pkt
->psn
);
1029 return RESPST_CLEANUP
;
1032 static enum resp_states
cleanup(struct rxe_qp
*qp
,
1033 struct rxe_pkt_info
*pkt
)
1035 struct sk_buff
*skb
;
1038 skb
= skb_dequeue(&qp
->req_pkts
);
1044 rxe_drop_ref(qp
->resp
.mr
);
1051 static struct resp_res
*find_resource(struct rxe_qp
*qp
, u32 psn
)
1055 for (i
= 0; i
< qp
->attr
.max_rd_atomic
; i
++) {
1056 struct resp_res
*res
= &qp
->resp
.resources
[i
];
1061 if (psn_compare(psn
, res
->first_psn
) >= 0 &&
1062 psn_compare(psn
, res
->last_psn
) <= 0) {
1070 static enum resp_states
duplicate_request(struct rxe_qp
*qp
,
1071 struct rxe_pkt_info
*pkt
)
1073 enum resp_states rc
;
1074 u32 prev_psn
= (qp
->resp
.psn
- 1) & BTH_PSN_MASK
;
1076 if (pkt
->mask
& RXE_SEND_MASK
||
1077 pkt
->mask
& RXE_WRITE_MASK
) {
1078 /* SEND. Ack again and cleanup. C9-105. */
1080 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, prev_psn
);
1081 rc
= RESPST_CLEANUP
;
1083 } else if (pkt
->mask
& RXE_READ_MASK
) {
1084 struct resp_res
*res
;
1086 res
= find_resource(qp
, pkt
->psn
);
1088 /* Resource not found. Class D error. Drop the
1091 rc
= RESPST_CLEANUP
;
1094 /* Ensure this new request is the same as the previous
1095 * one or a subset of it.
1097 u64 iova
= reth_va(pkt
);
1098 u32 resid
= reth_len(pkt
);
1100 if (iova
< res
->read
.va_org
||
1101 resid
> res
->read
.length
||
1102 (iova
+ resid
) > (res
->read
.va_org
+
1103 res
->read
.length
)) {
1104 rc
= RESPST_CLEANUP
;
1108 if (reth_rkey(pkt
) != res
->read
.rkey
) {
1109 rc
= RESPST_CLEANUP
;
1113 res
->cur_psn
= pkt
->psn
;
1114 res
->state
= (pkt
->psn
== res
->first_psn
) ?
1115 rdatm_res_state_new
:
1116 rdatm_res_state_replay
;
1118 /* Reset the resource, except length. */
1119 res
->read
.va_org
= iova
;
1120 res
->read
.va
= iova
;
1121 res
->read
.resid
= resid
;
1123 /* Replay the RDMA read reply. */
1125 rc
= RESPST_READ_REPLY
;
1129 struct resp_res
*res
;
1131 /* Find the operation in our list of responder resources. */
1132 res
= find_resource(qp
, pkt
->psn
);
1134 struct sk_buff
*skb_copy
;
1136 skb_copy
= skb_clone(res
->atomic
.skb
, GFP_ATOMIC
);
1138 rxe_add_ref(qp
); /* for the new SKB */
1140 pr_warn("Couldn't clone atomic resp\n");
1141 rc
= RESPST_CLEANUP
;
1145 /* Resend the result. */
1146 rc
= rxe_xmit_packet(to_rdev(qp
->ibqp
.device
), qp
,
1149 pr_err("Failed resending result. This flow is not handled - skb ignored\n");
1151 kfree_skb(skb_copy
);
1152 rc
= RESPST_CLEANUP
;
1157 /* Resource not found. Class D error. Drop the request. */
1158 rc
= RESPST_CLEANUP
;
1165 /* Process a class A or C. Both are treated the same in this implementation. */
1166 static void do_class_ac_error(struct rxe_qp
*qp
, u8 syndrome
,
1167 enum ib_wc_status status
)
1169 qp
->resp
.aeth_syndrome
= syndrome
;
1170 qp
->resp
.status
= status
;
1172 /* indicate that we should go through the ERROR state */
1173 qp
->resp
.goto_error
= 1;
1176 static enum resp_states
do_class_d1e_error(struct rxe_qp
*qp
)
1181 qp
->resp
.drop_msg
= 1;
1183 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1184 return RESPST_COMPLETE
;
1186 return RESPST_CLEANUP
;
1189 /* Class D1. This packet may be the start of a
1190 * new message and could be valid. The previous
1191 * message is invalid and ignored. reset the
1192 * recv wr to its original state
1195 qp
->resp
.wqe
->dma
.resid
= qp
->resp
.wqe
->dma
.length
;
1196 qp
->resp
.wqe
->dma
.cur_sge
= 0;
1197 qp
->resp
.wqe
->dma
.sge_offset
= 0;
1198 qp
->resp
.opcode
= -1;
1202 rxe_drop_ref(qp
->resp
.mr
);
1206 return RESPST_CLEANUP
;
1210 void rxe_drain_req_pkts(struct rxe_qp
*qp
, bool notify
)
1212 struct sk_buff
*skb
;
1214 while ((skb
= skb_dequeue(&qp
->req_pkts
))) {
1219 while (!qp
->srq
&& qp
->rq
.queue
&& queue_head(qp
->rq
.queue
))
1220 advance_consumer(qp
->rq
.queue
);
1223 int rxe_responder(void *arg
)
1225 struct rxe_qp
*qp
= (struct rxe_qp
*)arg
;
1226 enum resp_states state
;
1227 struct rxe_pkt_info
*pkt
= NULL
;
1232 qp
->resp
.aeth_syndrome
= AETH_ACK_UNLIMITED
;
1239 switch (qp
->resp
.state
) {
1240 case QP_STATE_RESET
:
1241 state
= RESPST_RESET
;
1245 state
= RESPST_GET_REQ
;
1250 pr_debug("qp#%d state = %s\n", qp_num(qp
),
1251 resp_state_name
[state
]);
1253 case RESPST_GET_REQ
:
1254 state
= get_req(qp
, &pkt
);
1256 case RESPST_CHK_PSN
:
1257 state
= check_psn(qp
, pkt
);
1259 case RESPST_CHK_OP_SEQ
:
1260 state
= check_op_seq(qp
, pkt
);
1262 case RESPST_CHK_OP_VALID
:
1263 state
= check_op_valid(qp
, pkt
);
1265 case RESPST_CHK_RESOURCE
:
1266 state
= check_resource(qp
, pkt
);
1268 case RESPST_CHK_LENGTH
:
1269 state
= check_length(qp
, pkt
);
1271 case RESPST_CHK_RKEY
:
1272 state
= check_rkey(qp
, pkt
);
1274 case RESPST_EXECUTE
:
1275 state
= execute(qp
, pkt
);
1277 case RESPST_COMPLETE
:
1278 state
= do_complete(qp
, pkt
);
1280 case RESPST_READ_REPLY
:
1281 state
= read_reply(qp
, pkt
);
1283 case RESPST_ACKNOWLEDGE
:
1284 state
= acknowledge(qp
, pkt
);
1286 case RESPST_CLEANUP
:
1287 state
= cleanup(qp
, pkt
);
1289 case RESPST_DUPLICATE_REQUEST
:
1290 state
= duplicate_request(qp
, pkt
);
1292 case RESPST_ERR_PSN_OUT_OF_SEQ
:
1293 /* RC only - Class B. Drop packet. */
1294 send_ack(qp
, pkt
, AETH_NAK_PSN_SEQ_ERROR
, qp
->resp
.psn
);
1295 state
= RESPST_CLEANUP
;
1298 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
:
1299 case RESPST_ERR_MISSING_OPCODE_FIRST
:
1300 case RESPST_ERR_MISSING_OPCODE_LAST_C
:
1301 case RESPST_ERR_UNSUPPORTED_OPCODE
:
1302 case RESPST_ERR_MISALIGNED_ATOMIC
:
1303 /* RC Only - Class C. */
1304 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1305 IB_WC_REM_INV_REQ_ERR
);
1306 state
= RESPST_COMPLETE
;
1309 case RESPST_ERR_MISSING_OPCODE_LAST_D1E
:
1310 state
= do_class_d1e_error(qp
);
1312 case RESPST_ERR_RNR
:
1313 if (qp_type(qp
) == IB_QPT_RC
) {
1315 send_ack(qp
, pkt
, AETH_RNR_NAK
|
1317 qp
->attr
.min_rnr_timer
),
1320 /* UD/UC - class D */
1321 qp
->resp
.drop_msg
= 1;
1323 state
= RESPST_CLEANUP
;
1326 case RESPST_ERR_RKEY_VIOLATION
:
1327 if (qp_type(qp
) == IB_QPT_RC
) {
1329 do_class_ac_error(qp
, AETH_NAK_REM_ACC_ERR
,
1330 IB_WC_REM_ACCESS_ERR
);
1331 state
= RESPST_COMPLETE
;
1333 qp
->resp
.drop_msg
= 1;
1335 /* UC/SRQ Class D */
1336 qp
->resp
.status
= IB_WC_REM_ACCESS_ERR
;
1337 state
= RESPST_COMPLETE
;
1339 /* UC/non-SRQ Class E. */
1340 state
= RESPST_CLEANUP
;
1345 case RESPST_ERR_LENGTH
:
1346 if (qp_type(qp
) == IB_QPT_RC
) {
1348 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1349 IB_WC_REM_INV_REQ_ERR
);
1350 state
= RESPST_COMPLETE
;
1351 } else if (qp
->srq
) {
1352 /* UC/UD - class E */
1353 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1354 state
= RESPST_COMPLETE
;
1356 /* UC/UD - class D */
1357 qp
->resp
.drop_msg
= 1;
1358 state
= RESPST_CLEANUP
;
1362 case RESPST_ERR_MALFORMED_WQE
:
1364 do_class_ac_error(qp
, AETH_NAK_REM_OP_ERR
,
1365 IB_WC_LOC_QP_OP_ERR
);
1366 state
= RESPST_COMPLETE
;
1369 case RESPST_ERR_CQ_OVERFLOW
:
1371 state
= RESPST_ERROR
;
1375 if (qp
->resp
.goto_error
) {
1376 state
= RESPST_ERROR
;
1383 if (qp
->resp
.goto_error
) {
1384 state
= RESPST_ERROR
;
1391 rxe_drain_req_pkts(qp
, false);
1392 qp
->resp
.wqe
= NULL
;
1396 qp
->resp
.goto_error
= 0;
1397 pr_warn("qp#%d moved to error state\n", qp_num(qp
));