2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/skbuff.h>
38 #include "rxe_queue.h"
54 RESPST_DUPLICATE_REQUEST
,
55 RESPST_ERR_MALFORMED_WQE
,
56 RESPST_ERR_UNSUPPORTED_OPCODE
,
57 RESPST_ERR_MISALIGNED_ATOMIC
,
58 RESPST_ERR_PSN_OUT_OF_SEQ
,
59 RESPST_ERR_MISSING_OPCODE_FIRST
,
60 RESPST_ERR_MISSING_OPCODE_LAST_C
,
61 RESPST_ERR_MISSING_OPCODE_LAST_D1E
,
62 RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
,
64 RESPST_ERR_RKEY_VIOLATION
,
66 RESPST_ERR_CQ_OVERFLOW
,
73 static char *resp_state_name
[] = {
74 [RESPST_NONE
] = "NONE",
75 [RESPST_GET_REQ
] = "GET_REQ",
76 [RESPST_CHK_PSN
] = "CHK_PSN",
77 [RESPST_CHK_OP_SEQ
] = "CHK_OP_SEQ",
78 [RESPST_CHK_OP_VALID
] = "CHK_OP_VALID",
79 [RESPST_CHK_RESOURCE
] = "CHK_RESOURCE",
80 [RESPST_CHK_LENGTH
] = "CHK_LENGTH",
81 [RESPST_CHK_RKEY
] = "CHK_RKEY",
82 [RESPST_EXECUTE
] = "EXECUTE",
83 [RESPST_READ_REPLY
] = "READ_REPLY",
84 [RESPST_COMPLETE
] = "COMPLETE",
85 [RESPST_ACKNOWLEDGE
] = "ACKNOWLEDGE",
86 [RESPST_CLEANUP
] = "CLEANUP",
87 [RESPST_DUPLICATE_REQUEST
] = "DUPLICATE_REQUEST",
88 [RESPST_ERR_MALFORMED_WQE
] = "ERR_MALFORMED_WQE",
89 [RESPST_ERR_UNSUPPORTED_OPCODE
] = "ERR_UNSUPPORTED_OPCODE",
90 [RESPST_ERR_MISALIGNED_ATOMIC
] = "ERR_MISALIGNED_ATOMIC",
91 [RESPST_ERR_PSN_OUT_OF_SEQ
] = "ERR_PSN_OUT_OF_SEQ",
92 [RESPST_ERR_MISSING_OPCODE_FIRST
] = "ERR_MISSING_OPCODE_FIRST",
93 [RESPST_ERR_MISSING_OPCODE_LAST_C
] = "ERR_MISSING_OPCODE_LAST_C",
94 [RESPST_ERR_MISSING_OPCODE_LAST_D1E
] = "ERR_MISSING_OPCODE_LAST_D1E",
95 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
] = "ERR_TOO_MANY_RDMA_ATM_REQ",
96 [RESPST_ERR_RNR
] = "ERR_RNR",
97 [RESPST_ERR_RKEY_VIOLATION
] = "ERR_RKEY_VIOLATION",
98 [RESPST_ERR_LENGTH
] = "ERR_LENGTH",
99 [RESPST_ERR_CQ_OVERFLOW
] = "ERR_CQ_OVERFLOW",
100 [RESPST_ERROR
] = "ERROR",
101 [RESPST_RESET
] = "RESET",
102 [RESPST_DONE
] = "DONE",
103 [RESPST_EXIT
] = "EXIT",
106 /* rxe_recv calls here to add a request packet to the input queue */
107 void rxe_resp_queue_pkt(struct rxe_dev
*rxe
, struct rxe_qp
*qp
,
111 struct rxe_pkt_info
*pkt
= SKB_TO_PKT(skb
);
113 skb_queue_tail(&qp
->req_pkts
, skb
);
115 must_sched
= (pkt
->opcode
== IB_OPCODE_RC_RDMA_READ_REQUEST
) ||
116 (skb_queue_len(&qp
->req_pkts
) > 1);
118 rxe_run_task(&qp
->resp
.task
, must_sched
);
121 static inline enum resp_states
get_req(struct rxe_qp
*qp
,
122 struct rxe_pkt_info
**pkt_p
)
126 if (qp
->resp
.state
== QP_STATE_ERROR
) {
127 skb
= skb_dequeue(&qp
->req_pkts
);
129 /* drain request packet queue */
132 return RESPST_GET_REQ
;
135 /* go drain recv wr queue */
136 return RESPST_CHK_RESOURCE
;
139 skb
= skb_peek(&qp
->req_pkts
);
143 *pkt_p
= SKB_TO_PKT(skb
);
145 return (qp
->resp
.res
) ? RESPST_READ_REPLY
: RESPST_CHK_PSN
;
148 static enum resp_states
check_psn(struct rxe_qp
*qp
,
149 struct rxe_pkt_info
*pkt
)
151 int diff
= psn_compare(pkt
->psn
, qp
->resp
.psn
);
152 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
154 switch (qp_type(qp
)) {
157 if (qp
->resp
.sent_psn_nak
)
158 return RESPST_CLEANUP
;
160 qp
->resp
.sent_psn_nak
= 1;
161 rxe_counter_inc(rxe
, RXE_CNT_OUT_OF_SEQ_REQ
);
162 return RESPST_ERR_PSN_OUT_OF_SEQ
;
164 } else if (diff
< 0) {
165 rxe_counter_inc(rxe
, RXE_CNT_DUP_REQ
);
166 return RESPST_DUPLICATE_REQUEST
;
169 if (qp
->resp
.sent_psn_nak
)
170 qp
->resp
.sent_psn_nak
= 0;
175 if (qp
->resp
.drop_msg
|| diff
!= 0) {
176 if (pkt
->mask
& RXE_START_MASK
) {
177 qp
->resp
.drop_msg
= 0;
178 return RESPST_CHK_OP_SEQ
;
181 qp
->resp
.drop_msg
= 1;
182 return RESPST_CLEANUP
;
189 return RESPST_CHK_OP_SEQ
;
192 static enum resp_states
check_op_seq(struct rxe_qp
*qp
,
193 struct rxe_pkt_info
*pkt
)
195 switch (qp_type(qp
)) {
197 switch (qp
->resp
.opcode
) {
198 case IB_OPCODE_RC_SEND_FIRST
:
199 case IB_OPCODE_RC_SEND_MIDDLE
:
200 switch (pkt
->opcode
) {
201 case IB_OPCODE_RC_SEND_MIDDLE
:
202 case IB_OPCODE_RC_SEND_LAST
:
203 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
204 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
205 return RESPST_CHK_OP_VALID
;
207 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
210 case IB_OPCODE_RC_RDMA_WRITE_FIRST
:
211 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
212 switch (pkt
->opcode
) {
213 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
214 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
215 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
216 return RESPST_CHK_OP_VALID
;
218 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
222 switch (pkt
->opcode
) {
223 case IB_OPCODE_RC_SEND_MIDDLE
:
224 case IB_OPCODE_RC_SEND_LAST
:
225 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
226 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
227 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
228 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
229 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
230 return RESPST_ERR_MISSING_OPCODE_FIRST
;
232 return RESPST_CHK_OP_VALID
;
238 switch (qp
->resp
.opcode
) {
239 case IB_OPCODE_UC_SEND_FIRST
:
240 case IB_OPCODE_UC_SEND_MIDDLE
:
241 switch (pkt
->opcode
) {
242 case IB_OPCODE_UC_SEND_MIDDLE
:
243 case IB_OPCODE_UC_SEND_LAST
:
244 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
245 return RESPST_CHK_OP_VALID
;
247 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
250 case IB_OPCODE_UC_RDMA_WRITE_FIRST
:
251 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
252 switch (pkt
->opcode
) {
253 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
254 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
255 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
256 return RESPST_CHK_OP_VALID
;
258 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
262 switch (pkt
->opcode
) {
263 case IB_OPCODE_UC_SEND_MIDDLE
:
264 case IB_OPCODE_UC_SEND_LAST
:
265 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
266 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
267 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
268 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
269 qp
->resp
.drop_msg
= 1;
270 return RESPST_CLEANUP
;
272 return RESPST_CHK_OP_VALID
;
278 return RESPST_CHK_OP_VALID
;
282 static enum resp_states
check_op_valid(struct rxe_qp
*qp
,
283 struct rxe_pkt_info
*pkt
)
285 switch (qp_type(qp
)) {
287 if (((pkt
->mask
& RXE_READ_MASK
) &&
288 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_READ
)) ||
289 ((pkt
->mask
& RXE_WRITE_MASK
) &&
290 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) ||
291 ((pkt
->mask
& RXE_ATOMIC_MASK
) &&
292 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_ATOMIC
))) {
293 return RESPST_ERR_UNSUPPORTED_OPCODE
;
299 if ((pkt
->mask
& RXE_WRITE_MASK
) &&
300 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) {
301 qp
->resp
.drop_msg
= 1;
302 return RESPST_CLEANUP
;
317 return RESPST_CHK_RESOURCE
;
320 static enum resp_states
get_srq_wqe(struct rxe_qp
*qp
)
322 struct rxe_srq
*srq
= qp
->srq
;
323 struct rxe_queue
*q
= srq
->rq
.queue
;
324 struct rxe_recv_wqe
*wqe
;
328 return RESPST_ERR_RNR
;
330 spin_lock_bh(&srq
->rq
.consumer_lock
);
334 spin_unlock_bh(&srq
->rq
.consumer_lock
);
335 return RESPST_ERR_RNR
;
338 /* note kernel and user space recv wqes have same size */
339 memcpy(&qp
->resp
.srq_wqe
, wqe
, sizeof(qp
->resp
.srq_wqe
));
341 qp
->resp
.wqe
= &qp
->resp
.srq_wqe
.wqe
;
344 if (srq
->limit
&& srq
->ibsrq
.event_handler
&&
345 (queue_count(q
) < srq
->limit
)) {
350 spin_unlock_bh(&srq
->rq
.consumer_lock
);
351 return RESPST_CHK_LENGTH
;
354 spin_unlock_bh(&srq
->rq
.consumer_lock
);
355 ev
.device
= qp
->ibqp
.device
;
356 ev
.element
.srq
= qp
->ibqp
.srq
;
357 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
358 srq
->ibsrq
.event_handler(&ev
, srq
->ibsrq
.srq_context
);
359 return RESPST_CHK_LENGTH
;
362 static enum resp_states
check_resource(struct rxe_qp
*qp
,
363 struct rxe_pkt_info
*pkt
)
365 struct rxe_srq
*srq
= qp
->srq
;
367 if (qp
->resp
.state
== QP_STATE_ERROR
) {
369 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
370 return RESPST_COMPLETE
;
372 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
374 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
375 return RESPST_COMPLETE
;
384 if (pkt
->mask
& RXE_READ_OR_ATOMIC
) {
385 /* it is the requesters job to not send
386 * too many read/atomic ops, we just
387 * recycle the responder resource queue
389 if (likely(qp
->attr
.max_dest_rd_atomic
> 0))
390 return RESPST_CHK_LENGTH
;
392 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
;
395 if (pkt
->mask
& RXE_RWR_MASK
) {
397 return get_srq_wqe(qp
);
399 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
400 return (qp
->resp
.wqe
) ? RESPST_CHK_LENGTH
: RESPST_ERR_RNR
;
403 return RESPST_CHK_LENGTH
;
406 static enum resp_states
check_length(struct rxe_qp
*qp
,
407 struct rxe_pkt_info
*pkt
)
409 switch (qp_type(qp
)) {
411 return RESPST_CHK_RKEY
;
414 return RESPST_CHK_RKEY
;
417 return RESPST_CHK_RKEY
;
421 static enum resp_states
check_rkey(struct rxe_qp
*qp
,
422 struct rxe_pkt_info
*pkt
)
424 struct rxe_mem
*mem
= NULL
;
430 enum resp_states state
;
433 if (pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_MASK
)) {
434 if (pkt
->mask
& RXE_RETH_MASK
) {
435 qp
->resp
.va
= reth_va(pkt
);
436 qp
->resp
.rkey
= reth_rkey(pkt
);
437 qp
->resp
.resid
= reth_len(pkt
);
439 access
= (pkt
->mask
& RXE_READ_MASK
) ? IB_ACCESS_REMOTE_READ
440 : IB_ACCESS_REMOTE_WRITE
;
441 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
442 qp
->resp
.va
= atmeth_va(pkt
);
443 qp
->resp
.rkey
= atmeth_rkey(pkt
);
444 qp
->resp
.resid
= sizeof(u64
);
445 access
= IB_ACCESS_REMOTE_ATOMIC
;
447 return RESPST_EXECUTE
;
450 /* A zero-byte op is not required to set an addr or rkey. */
451 if ((pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_OR_SEND
)) &&
452 (pkt
->mask
& RXE_RETH_MASK
) &&
453 reth_len(pkt
) == 0) {
454 return RESPST_EXECUTE
;
458 rkey
= qp
->resp
.rkey
;
459 resid
= qp
->resp
.resid
;
460 pktlen
= payload_size(pkt
);
462 mem
= lookup_mem(qp
->pd
, access
, rkey
, lookup_remote
);
464 state
= RESPST_ERR_RKEY_VIOLATION
;
468 if (unlikely(mem
->state
== RXE_MEM_STATE_FREE
)) {
469 state
= RESPST_ERR_RKEY_VIOLATION
;
473 if (mem_check_range(mem
, va
, resid
)) {
474 state
= RESPST_ERR_RKEY_VIOLATION
;
478 if (pkt
->mask
& RXE_WRITE_MASK
) {
480 if (pktlen
!= mtu
|| bth_pad(pkt
)) {
481 state
= RESPST_ERR_LENGTH
;
485 if (pktlen
!= resid
) {
486 state
= RESPST_ERR_LENGTH
;
489 if ((bth_pad(pkt
) != (0x3 & (-resid
)))) {
490 /* This case may not be exactly that
491 * but nothing else fits.
493 state
= RESPST_ERR_LENGTH
;
499 WARN_ON_ONCE(qp
->resp
.mr
);
502 return RESPST_EXECUTE
;
510 static enum resp_states
send_data_in(struct rxe_qp
*qp
, void *data_addr
,
514 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
516 err
= copy_data(rxe
, qp
->pd
, IB_ACCESS_LOCAL_WRITE
, &qp
->resp
.wqe
->dma
,
517 data_addr
, data_len
, to_mem_obj
, NULL
);
519 return (err
== -ENOSPC
) ? RESPST_ERR_LENGTH
520 : RESPST_ERR_MALFORMED_WQE
;
525 static enum resp_states
write_data_in(struct rxe_qp
*qp
,
526 struct rxe_pkt_info
*pkt
)
528 enum resp_states rc
= RESPST_NONE
;
530 int data_len
= payload_size(pkt
);
532 err
= rxe_mem_copy(qp
->resp
.mr
, qp
->resp
.va
, payload_addr(pkt
),
533 data_len
, to_mem_obj
, NULL
);
535 rc
= RESPST_ERR_RKEY_VIOLATION
;
539 qp
->resp
.va
+= data_len
;
540 qp
->resp
.resid
-= data_len
;
546 /* Guarantee atomicity of atomic operations at the machine level. */
547 static DEFINE_SPINLOCK(atomic_ops_lock
);
549 static enum resp_states
process_atomic(struct rxe_qp
*qp
,
550 struct rxe_pkt_info
*pkt
)
552 u64 iova
= atmeth_va(pkt
);
554 enum resp_states ret
;
555 struct rxe_mem
*mr
= qp
->resp
.mr
;
557 if (mr
->state
!= RXE_MEM_STATE_VALID
) {
558 ret
= RESPST_ERR_RKEY_VIOLATION
;
562 vaddr
= iova_to_vaddr(mr
, iova
, sizeof(u64
));
564 /* check vaddr is 8 bytes aligned. */
565 if (!vaddr
|| (uintptr_t)vaddr
& 7) {
566 ret
= RESPST_ERR_MISALIGNED_ATOMIC
;
570 spin_lock_bh(&atomic_ops_lock
);
572 qp
->resp
.atomic_orig
= *vaddr
;
574 if (pkt
->opcode
== IB_OPCODE_RC_COMPARE_SWAP
||
575 pkt
->opcode
== IB_OPCODE_RD_COMPARE_SWAP
) {
576 if (*vaddr
== atmeth_comp(pkt
))
577 *vaddr
= atmeth_swap_add(pkt
);
579 *vaddr
+= atmeth_swap_add(pkt
);
582 spin_unlock_bh(&atomic_ops_lock
);
589 static struct sk_buff
*prepare_ack_packet(struct rxe_qp
*qp
,
590 struct rxe_pkt_info
*pkt
,
591 struct rxe_pkt_info
*ack
,
598 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
609 pad
= (-payload
) & 0x3;
610 paylen
= rxe_opcode
[opcode
].length
+ payload
+ pad
+ RXE_ICRC_SIZE
;
612 skb
= rxe_init_packet(rxe
, &qp
->pri_av
, paylen
, ack
);
617 ack
->opcode
= opcode
;
618 ack
->mask
= rxe_opcode
[opcode
].mask
;
619 ack
->offset
= pkt
->offset
;
620 ack
->paylen
= paylen
;
622 /* fill in bth using the request packet headers */
623 memcpy(ack
->hdr
, pkt
->hdr
, pkt
->offset
+ RXE_BTH_BYTES
);
625 bth_set_opcode(ack
, opcode
);
626 bth_set_qpn(ack
, qp
->attr
.dest_qp_num
);
627 bth_set_pad(ack
, pad
);
629 bth_set_psn(ack
, psn
);
633 if (ack
->mask
& RXE_AETH_MASK
) {
634 aeth_set_syn(ack
, syndrome
);
635 aeth_set_msn(ack
, qp
->resp
.msn
);
638 if (ack
->mask
& RXE_ATMACK_MASK
)
639 atmack_set_orig(ack
, qp
->resp
.atomic_orig
);
641 err
= rxe_prepare(rxe
, ack
, skb
, &crc
);
648 /* CRC computation will be continued by the caller */
651 p
= payload_addr(ack
) + payload
+ bth_pad(ack
);
658 /* RDMA read response. If res is not NULL, then we have a current RDMA request
659 * being processed or replayed.
661 static enum resp_states
read_reply(struct rxe_qp
*qp
,
662 struct rxe_pkt_info
*req_pkt
)
664 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
665 struct rxe_pkt_info ack_pkt
;
668 enum resp_states state
;
672 struct resp_res
*res
= qp
->resp
.res
;
677 /* This is the first time we process that request. Get a
680 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
682 free_rd_atomic_resource(qp
, res
);
683 rxe_advance_resp_resource(qp
);
685 res
->type
= RXE_READ_MASK
;
687 res
->read
.va
= qp
->resp
.va
;
688 res
->read
.va_org
= qp
->resp
.va
;
690 res
->first_psn
= req_pkt
->psn
;
692 if (reth_len(req_pkt
)) {
693 res
->last_psn
= (req_pkt
->psn
+
694 (reth_len(req_pkt
) + mtu
- 1) /
695 mtu
- 1) & BTH_PSN_MASK
;
697 res
->last_psn
= res
->first_psn
;
699 res
->cur_psn
= req_pkt
->psn
;
701 res
->read
.resid
= qp
->resp
.resid
;
702 res
->read
.length
= qp
->resp
.resid
;
703 res
->read
.rkey
= qp
->resp
.rkey
;
705 /* note res inherits the reference to mr from qp */
706 res
->read
.mr
= qp
->resp
.mr
;
710 res
->state
= rdatm_res_state_new
;
713 if (res
->state
== rdatm_res_state_new
) {
714 if (res
->read
.resid
<= mtu
)
715 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY
;
717 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST
;
719 if (res
->read
.resid
> mtu
)
720 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
;
722 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST
;
725 res
->state
= rdatm_res_state_next
;
727 payload
= min_t(int, res
->read
.resid
, mtu
);
729 skb
= prepare_ack_packet(qp
, req_pkt
, &ack_pkt
, opcode
, payload
,
730 res
->cur_psn
, AETH_ACK_UNLIMITED
, &icrc
);
732 return RESPST_ERR_RNR
;
734 err
= rxe_mem_copy(res
->read
.mr
, res
->read
.va
, payload_addr(&ack_pkt
),
735 payload
, from_mem_obj
, &icrc
);
737 pr_err("Failed copying memory\n");
739 p
= payload_addr(&ack_pkt
) + payload
+ bth_pad(&ack_pkt
);
742 err
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb
);
744 pr_err("Failed sending RDMA reply.\n");
746 return RESPST_ERR_RNR
;
749 res
->read
.va
+= payload
;
750 res
->read
.resid
-= payload
;
751 res
->cur_psn
= (res
->cur_psn
+ 1) & BTH_PSN_MASK
;
753 if (res
->read
.resid
> 0) {
757 qp
->resp
.opcode
= -1;
758 if (psn_compare(res
->cur_psn
, qp
->resp
.psn
) >= 0)
759 qp
->resp
.psn
= res
->cur_psn
;
760 state
= RESPST_CLEANUP
;
766 static void build_rdma_network_hdr(union rdma_network_hdr
*hdr
,
767 struct rxe_pkt_info
*pkt
)
769 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
771 memset(hdr
, 0, sizeof(*hdr
));
772 if (skb
->protocol
== htons(ETH_P_IP
))
773 memcpy(&hdr
->roce4grh
, ip_hdr(skb
), sizeof(hdr
->roce4grh
));
774 else if (skb
->protocol
== htons(ETH_P_IPV6
))
775 memcpy(&hdr
->ibgrh
, ipv6_hdr(skb
), sizeof(hdr
->ibgrh
));
778 /* Executes a new request. A retried request never reach that function (send
779 * and writes are discarded, and reads and atomics are retried elsewhere.
781 static enum resp_states
execute(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
)
783 enum resp_states err
;
785 if (pkt
->mask
& RXE_SEND_MASK
) {
786 if (qp_type(qp
) == IB_QPT_UD
||
787 qp_type(qp
) == IB_QPT_SMI
||
788 qp_type(qp
) == IB_QPT_GSI
) {
789 union rdma_network_hdr hdr
;
791 build_rdma_network_hdr(&hdr
, pkt
);
793 err
= send_data_in(qp
, &hdr
, sizeof(hdr
));
797 err
= send_data_in(qp
, payload_addr(pkt
), payload_size(pkt
));
800 } else if (pkt
->mask
& RXE_WRITE_MASK
) {
801 err
= write_data_in(qp
, pkt
);
804 } else if (pkt
->mask
& RXE_READ_MASK
) {
805 /* For RDMA Read we can increment the msn now. See C9-148. */
807 return RESPST_READ_REPLY
;
808 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
809 err
= process_atomic(qp
, pkt
);
817 /* next expected psn, read handles this separately */
818 qp
->resp
.psn
= (pkt
->psn
+ 1) & BTH_PSN_MASK
;
820 qp
->resp
.opcode
= pkt
->opcode
;
821 qp
->resp
.status
= IB_WC_SUCCESS
;
823 if (pkt
->mask
& RXE_COMP_MASK
) {
824 /* We successfully processed this new request. */
826 return RESPST_COMPLETE
;
827 } else if (qp_type(qp
) == IB_QPT_RC
)
828 return RESPST_ACKNOWLEDGE
;
830 return RESPST_CLEANUP
;
833 static enum resp_states
do_complete(struct rxe_qp
*qp
,
834 struct rxe_pkt_info
*pkt
)
837 struct ib_wc
*wc
= &cqe
.ibwc
;
838 struct ib_uverbs_wc
*uwc
= &cqe
.uibwc
;
839 struct rxe_recv_wqe
*wqe
= qp
->resp
.wqe
;
842 return RESPST_CLEANUP
;
844 memset(&cqe
, 0, sizeof(cqe
));
846 wc
->wr_id
= wqe
->wr_id
;
847 wc
->status
= qp
->resp
.status
;
850 /* fields after status are not required for errors */
851 if (wc
->status
== IB_WC_SUCCESS
) {
852 wc
->opcode
= (pkt
->mask
& RXE_IMMDT_MASK
&&
853 pkt
->mask
& RXE_WRITE_MASK
) ?
854 IB_WC_RECV_RDMA_WITH_IMM
: IB_WC_RECV
;
856 wc
->byte_len
= wqe
->dma
.length
- wqe
->dma
.resid
;
858 /* fields after byte_len are different between kernel and user
861 if (qp
->rcq
->is_user
) {
862 uwc
->wc_flags
= IB_WC_GRH
;
864 if (pkt
->mask
& RXE_IMMDT_MASK
) {
865 uwc
->wc_flags
|= IB_WC_WITH_IMM
;
866 uwc
->ex
.imm_data
= immdt_imm(pkt
);
869 if (pkt
->mask
& RXE_IETH_MASK
) {
870 uwc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
871 uwc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
874 uwc
->qp_num
= qp
->ibqp
.qp_num
;
876 if (pkt
->mask
& RXE_DETH_MASK
)
877 uwc
->src_qp
= deth_sqp(pkt
);
879 uwc
->port_num
= qp
->attr
.port_num
;
881 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
883 wc
->wc_flags
= IB_WC_GRH
| IB_WC_WITH_NETWORK_HDR_TYPE
;
884 if (skb
->protocol
== htons(ETH_P_IP
))
885 wc
->network_hdr_type
= RDMA_NETWORK_IPV4
;
887 wc
->network_hdr_type
= RDMA_NETWORK_IPV6
;
889 if (pkt
->mask
& RXE_IMMDT_MASK
) {
890 wc
->wc_flags
|= IB_WC_WITH_IMM
;
891 wc
->ex
.imm_data
= immdt_imm(pkt
);
894 if (pkt
->mask
& RXE_IETH_MASK
) {
895 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
898 wc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
899 wc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
901 rmr
= rxe_pool_get_index(&rxe
->mr_pool
,
902 wc
->ex
.invalidate_rkey
>> 8);
903 if (unlikely(!rmr
)) {
904 pr_err("Bad rkey %#x invalidation\n",
905 wc
->ex
.invalidate_rkey
);
908 rmr
->state
= RXE_MEM_STATE_FREE
;
914 if (pkt
->mask
& RXE_DETH_MASK
)
915 wc
->src_qp
= deth_sqp(pkt
);
917 wc
->port_num
= qp
->attr
.port_num
;
921 /* have copy for srq and reference for !srq */
923 advance_consumer(qp
->rq
.queue
);
927 if (rxe_cq_post(qp
->rcq
, &cqe
, pkt
? bth_se(pkt
) : 1))
928 return RESPST_ERR_CQ_OVERFLOW
;
930 if (qp
->resp
.state
== QP_STATE_ERROR
)
931 return RESPST_CHK_RESOURCE
;
935 else if (qp_type(qp
) == IB_QPT_RC
)
936 return RESPST_ACKNOWLEDGE
;
938 return RESPST_CLEANUP
;
941 static int send_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
942 u8 syndrome
, u32 psn
)
945 struct rxe_pkt_info ack_pkt
;
947 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
949 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
, IB_OPCODE_RC_ACKNOWLEDGE
,
950 0, psn
, syndrome
, NULL
);
956 err
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb
);
958 pr_err_ratelimited("Failed sending ack\n");
966 static int send_atomic_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
970 struct rxe_pkt_info ack_pkt
;
972 struct sk_buff
*skb_copy
;
973 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
974 struct resp_res
*res
;
976 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
,
977 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE
, 0, pkt
->psn
,
984 skb_copy
= skb_clone(skb
, GFP_ATOMIC
);
986 rxe_add_ref(qp
); /* for the new SKB */
988 pr_warn("Could not clone atomic response\n");
993 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
994 free_rd_atomic_resource(qp
, res
);
995 rxe_advance_resp_resource(qp
);
997 memcpy(SKB_TO_PKT(skb
), &ack_pkt
, sizeof(ack_pkt
));
998 memset((unsigned char *)SKB_TO_PKT(skb
) + sizeof(ack_pkt
), 0,
999 sizeof(skb
->cb
) - sizeof(ack_pkt
));
1001 res
->type
= RXE_ATOMIC_MASK
;
1002 res
->atomic
.skb
= skb
;
1003 res
->first_psn
= ack_pkt
.psn
;
1004 res
->last_psn
= ack_pkt
.psn
;
1005 res
->cur_psn
= ack_pkt
.psn
;
1007 rc
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb_copy
);
1009 pr_err_ratelimited("Failed sending ack\n");
1011 kfree_skb(skb_copy
);
1018 static enum resp_states
acknowledge(struct rxe_qp
*qp
,
1019 struct rxe_pkt_info
*pkt
)
1021 if (qp_type(qp
) != IB_QPT_RC
)
1022 return RESPST_CLEANUP
;
1024 if (qp
->resp
.aeth_syndrome
!= AETH_ACK_UNLIMITED
)
1025 send_ack(qp
, pkt
, qp
->resp
.aeth_syndrome
, pkt
->psn
);
1026 else if (pkt
->mask
& RXE_ATOMIC_MASK
)
1027 send_atomic_ack(qp
, pkt
, AETH_ACK_UNLIMITED
);
1028 else if (bth_ack(pkt
))
1029 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, pkt
->psn
);
1031 return RESPST_CLEANUP
;
1034 static enum resp_states
cleanup(struct rxe_qp
*qp
,
1035 struct rxe_pkt_info
*pkt
)
1037 struct sk_buff
*skb
;
1040 skb
= skb_dequeue(&qp
->req_pkts
);
1046 rxe_drop_ref(qp
->resp
.mr
);
1053 static struct resp_res
*find_resource(struct rxe_qp
*qp
, u32 psn
)
1057 for (i
= 0; i
< qp
->attr
.max_dest_rd_atomic
; i
++) {
1058 struct resp_res
*res
= &qp
->resp
.resources
[i
];
1063 if (psn_compare(psn
, res
->first_psn
) >= 0 &&
1064 psn_compare(psn
, res
->last_psn
) <= 0) {
1072 static enum resp_states
duplicate_request(struct rxe_qp
*qp
,
1073 struct rxe_pkt_info
*pkt
)
1075 enum resp_states rc
;
1076 u32 prev_psn
= (qp
->resp
.psn
- 1) & BTH_PSN_MASK
;
1078 if (pkt
->mask
& RXE_SEND_MASK
||
1079 pkt
->mask
& RXE_WRITE_MASK
) {
1080 /* SEND. Ack again and cleanup. C9-105. */
1082 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, prev_psn
);
1083 rc
= RESPST_CLEANUP
;
1085 } else if (pkt
->mask
& RXE_READ_MASK
) {
1086 struct resp_res
*res
;
1088 res
= find_resource(qp
, pkt
->psn
);
1090 /* Resource not found. Class D error. Drop the
1093 rc
= RESPST_CLEANUP
;
1096 /* Ensure this new request is the same as the previous
1097 * one or a subset of it.
1099 u64 iova
= reth_va(pkt
);
1100 u32 resid
= reth_len(pkt
);
1102 if (iova
< res
->read
.va_org
||
1103 resid
> res
->read
.length
||
1104 (iova
+ resid
) > (res
->read
.va_org
+
1105 res
->read
.length
)) {
1106 rc
= RESPST_CLEANUP
;
1110 if (reth_rkey(pkt
) != res
->read
.rkey
) {
1111 rc
= RESPST_CLEANUP
;
1115 res
->cur_psn
= pkt
->psn
;
1116 res
->state
= (pkt
->psn
== res
->first_psn
) ?
1117 rdatm_res_state_new
:
1118 rdatm_res_state_replay
;
1120 /* Reset the resource, except length. */
1121 res
->read
.va_org
= iova
;
1122 res
->read
.va
= iova
;
1123 res
->read
.resid
= resid
;
1125 /* Replay the RDMA read reply. */
1127 rc
= RESPST_READ_REPLY
;
1131 struct resp_res
*res
;
1133 /* Find the operation in our list of responder resources. */
1134 res
= find_resource(qp
, pkt
->psn
);
1136 struct sk_buff
*skb_copy
;
1138 skb_copy
= skb_clone(res
->atomic
.skb
, GFP_ATOMIC
);
1140 rxe_add_ref(qp
); /* for the new SKB */
1142 pr_warn("Couldn't clone atomic resp\n");
1143 rc
= RESPST_CLEANUP
;
1147 /* Resend the result. */
1148 rc
= rxe_xmit_packet(to_rdev(qp
->ibqp
.device
), qp
,
1151 pr_err("Failed resending result. This flow is not handled - skb ignored\n");
1153 kfree_skb(skb_copy
);
1154 rc
= RESPST_CLEANUP
;
1159 /* Resource not found. Class D error. Drop the request. */
1160 rc
= RESPST_CLEANUP
;
1167 /* Process a class A or C. Both are treated the same in this implementation. */
1168 static void do_class_ac_error(struct rxe_qp
*qp
, u8 syndrome
,
1169 enum ib_wc_status status
)
1171 qp
->resp
.aeth_syndrome
= syndrome
;
1172 qp
->resp
.status
= status
;
1174 /* indicate that we should go through the ERROR state */
1175 qp
->resp
.goto_error
= 1;
1178 static enum resp_states
do_class_d1e_error(struct rxe_qp
*qp
)
1183 qp
->resp
.drop_msg
= 1;
1185 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1186 return RESPST_COMPLETE
;
1188 return RESPST_CLEANUP
;
1191 /* Class D1. This packet may be the start of a
1192 * new message and could be valid. The previous
1193 * message is invalid and ignored. reset the
1194 * recv wr to its original state
1197 qp
->resp
.wqe
->dma
.resid
= qp
->resp
.wqe
->dma
.length
;
1198 qp
->resp
.wqe
->dma
.cur_sge
= 0;
1199 qp
->resp
.wqe
->dma
.sge_offset
= 0;
1200 qp
->resp
.opcode
= -1;
1204 rxe_drop_ref(qp
->resp
.mr
);
1208 return RESPST_CLEANUP
;
1212 static void rxe_drain_req_pkts(struct rxe_qp
*qp
, bool notify
)
1214 struct sk_buff
*skb
;
1216 while ((skb
= skb_dequeue(&qp
->req_pkts
))) {
1224 while (!qp
->srq
&& qp
->rq
.queue
&& queue_head(qp
->rq
.queue
))
1225 advance_consumer(qp
->rq
.queue
);
1228 int rxe_responder(void *arg
)
1230 struct rxe_qp
*qp
= (struct rxe_qp
*)arg
;
1231 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
1232 enum resp_states state
;
1233 struct rxe_pkt_info
*pkt
= NULL
;
1238 qp
->resp
.aeth_syndrome
= AETH_ACK_UNLIMITED
;
1245 switch (qp
->resp
.state
) {
1246 case QP_STATE_RESET
:
1247 state
= RESPST_RESET
;
1251 state
= RESPST_GET_REQ
;
1256 pr_debug("qp#%d state = %s\n", qp_num(qp
),
1257 resp_state_name
[state
]);
1259 case RESPST_GET_REQ
:
1260 state
= get_req(qp
, &pkt
);
1262 case RESPST_CHK_PSN
:
1263 state
= check_psn(qp
, pkt
);
1265 case RESPST_CHK_OP_SEQ
:
1266 state
= check_op_seq(qp
, pkt
);
1268 case RESPST_CHK_OP_VALID
:
1269 state
= check_op_valid(qp
, pkt
);
1271 case RESPST_CHK_RESOURCE
:
1272 state
= check_resource(qp
, pkt
);
1274 case RESPST_CHK_LENGTH
:
1275 state
= check_length(qp
, pkt
);
1277 case RESPST_CHK_RKEY
:
1278 state
= check_rkey(qp
, pkt
);
1280 case RESPST_EXECUTE
:
1281 state
= execute(qp
, pkt
);
1283 case RESPST_COMPLETE
:
1284 state
= do_complete(qp
, pkt
);
1286 case RESPST_READ_REPLY
:
1287 state
= read_reply(qp
, pkt
);
1289 case RESPST_ACKNOWLEDGE
:
1290 state
= acknowledge(qp
, pkt
);
1292 case RESPST_CLEANUP
:
1293 state
= cleanup(qp
, pkt
);
1295 case RESPST_DUPLICATE_REQUEST
:
1296 state
= duplicate_request(qp
, pkt
);
1298 case RESPST_ERR_PSN_OUT_OF_SEQ
:
1299 /* RC only - Class B. Drop packet. */
1300 send_ack(qp
, pkt
, AETH_NAK_PSN_SEQ_ERROR
, qp
->resp
.psn
);
1301 state
= RESPST_CLEANUP
;
1304 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
:
1305 case RESPST_ERR_MISSING_OPCODE_FIRST
:
1306 case RESPST_ERR_MISSING_OPCODE_LAST_C
:
1307 case RESPST_ERR_UNSUPPORTED_OPCODE
:
1308 case RESPST_ERR_MISALIGNED_ATOMIC
:
1309 /* RC Only - Class C. */
1310 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1311 IB_WC_REM_INV_REQ_ERR
);
1312 state
= RESPST_COMPLETE
;
1315 case RESPST_ERR_MISSING_OPCODE_LAST_D1E
:
1316 state
= do_class_d1e_error(qp
);
1318 case RESPST_ERR_RNR
:
1319 if (qp_type(qp
) == IB_QPT_RC
) {
1320 rxe_counter_inc(rxe
, RXE_CNT_SND_RNR
);
1322 send_ack(qp
, pkt
, AETH_RNR_NAK
|
1324 qp
->attr
.min_rnr_timer
),
1327 /* UD/UC - class D */
1328 qp
->resp
.drop_msg
= 1;
1330 state
= RESPST_CLEANUP
;
1333 case RESPST_ERR_RKEY_VIOLATION
:
1334 if (qp_type(qp
) == IB_QPT_RC
) {
1336 do_class_ac_error(qp
, AETH_NAK_REM_ACC_ERR
,
1337 IB_WC_REM_ACCESS_ERR
);
1338 state
= RESPST_COMPLETE
;
1340 qp
->resp
.drop_msg
= 1;
1342 /* UC/SRQ Class D */
1343 qp
->resp
.status
= IB_WC_REM_ACCESS_ERR
;
1344 state
= RESPST_COMPLETE
;
1346 /* UC/non-SRQ Class E. */
1347 state
= RESPST_CLEANUP
;
1352 case RESPST_ERR_LENGTH
:
1353 if (qp_type(qp
) == IB_QPT_RC
) {
1355 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1356 IB_WC_REM_INV_REQ_ERR
);
1357 state
= RESPST_COMPLETE
;
1358 } else if (qp
->srq
) {
1359 /* UC/UD - class E */
1360 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1361 state
= RESPST_COMPLETE
;
1363 /* UC/UD - class D */
1364 qp
->resp
.drop_msg
= 1;
1365 state
= RESPST_CLEANUP
;
1369 case RESPST_ERR_MALFORMED_WQE
:
1371 do_class_ac_error(qp
, AETH_NAK_REM_OP_ERR
,
1372 IB_WC_LOC_QP_OP_ERR
);
1373 state
= RESPST_COMPLETE
;
1376 case RESPST_ERR_CQ_OVERFLOW
:
1378 state
= RESPST_ERROR
;
1382 if (qp
->resp
.goto_error
) {
1383 state
= RESPST_ERROR
;
1390 if (qp
->resp
.goto_error
) {
1391 state
= RESPST_ERROR
;
1398 rxe_drain_req_pkts(qp
, false);
1399 qp
->resp
.wqe
= NULL
;
1403 qp
->resp
.goto_error
= 0;
1404 pr_warn("qp#%d moved to error state\n", qp_num(qp
));