2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/skbuff.h>
38 #include "rxe_queue.h"
54 RESPST_DUPLICATE_REQUEST
,
55 RESPST_ERR_MALFORMED_WQE
,
56 RESPST_ERR_UNSUPPORTED_OPCODE
,
57 RESPST_ERR_MISALIGNED_ATOMIC
,
58 RESPST_ERR_PSN_OUT_OF_SEQ
,
59 RESPST_ERR_MISSING_OPCODE_FIRST
,
60 RESPST_ERR_MISSING_OPCODE_LAST_C
,
61 RESPST_ERR_MISSING_OPCODE_LAST_D1E
,
62 RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
,
64 RESPST_ERR_RKEY_VIOLATION
,
66 RESPST_ERR_CQ_OVERFLOW
,
73 static char *resp_state_name
[] = {
74 [RESPST_NONE
] = "NONE",
75 [RESPST_GET_REQ
] = "GET_REQ",
76 [RESPST_CHK_PSN
] = "CHK_PSN",
77 [RESPST_CHK_OP_SEQ
] = "CHK_OP_SEQ",
78 [RESPST_CHK_OP_VALID
] = "CHK_OP_VALID",
79 [RESPST_CHK_RESOURCE
] = "CHK_RESOURCE",
80 [RESPST_CHK_LENGTH
] = "CHK_LENGTH",
81 [RESPST_CHK_RKEY
] = "CHK_RKEY",
82 [RESPST_EXECUTE
] = "EXECUTE",
83 [RESPST_READ_REPLY
] = "READ_REPLY",
84 [RESPST_COMPLETE
] = "COMPLETE",
85 [RESPST_ACKNOWLEDGE
] = "ACKNOWLEDGE",
86 [RESPST_CLEANUP
] = "CLEANUP",
87 [RESPST_DUPLICATE_REQUEST
] = "DUPLICATE_REQUEST",
88 [RESPST_ERR_MALFORMED_WQE
] = "ERR_MALFORMED_WQE",
89 [RESPST_ERR_UNSUPPORTED_OPCODE
] = "ERR_UNSUPPORTED_OPCODE",
90 [RESPST_ERR_MISALIGNED_ATOMIC
] = "ERR_MISALIGNED_ATOMIC",
91 [RESPST_ERR_PSN_OUT_OF_SEQ
] = "ERR_PSN_OUT_OF_SEQ",
92 [RESPST_ERR_MISSING_OPCODE_FIRST
] = "ERR_MISSING_OPCODE_FIRST",
93 [RESPST_ERR_MISSING_OPCODE_LAST_C
] = "ERR_MISSING_OPCODE_LAST_C",
94 [RESPST_ERR_MISSING_OPCODE_LAST_D1E
] = "ERR_MISSING_OPCODE_LAST_D1E",
95 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
] = "ERR_TOO_MANY_RDMA_ATM_REQ",
96 [RESPST_ERR_RNR
] = "ERR_RNR",
97 [RESPST_ERR_RKEY_VIOLATION
] = "ERR_RKEY_VIOLATION",
98 [RESPST_ERR_LENGTH
] = "ERR_LENGTH",
99 [RESPST_ERR_CQ_OVERFLOW
] = "ERR_CQ_OVERFLOW",
100 [RESPST_ERROR
] = "ERROR",
101 [RESPST_RESET
] = "RESET",
102 [RESPST_DONE
] = "DONE",
103 [RESPST_EXIT
] = "EXIT",
106 /* rxe_recv calls here to add a request packet to the input queue */
107 void rxe_resp_queue_pkt(struct rxe_dev
*rxe
, struct rxe_qp
*qp
,
111 struct rxe_pkt_info
*pkt
= SKB_TO_PKT(skb
);
113 skb_queue_tail(&qp
->req_pkts
, skb
);
115 must_sched
= (pkt
->opcode
== IB_OPCODE_RC_RDMA_READ_REQUEST
) ||
116 (skb_queue_len(&qp
->req_pkts
) > 1);
118 rxe_run_task(&qp
->resp
.task
, must_sched
);
121 static inline enum resp_states
get_req(struct rxe_qp
*qp
,
122 struct rxe_pkt_info
**pkt_p
)
126 if (qp
->resp
.state
== QP_STATE_ERROR
) {
127 skb
= skb_dequeue(&qp
->req_pkts
);
129 /* drain request packet queue */
132 return RESPST_GET_REQ
;
135 /* go drain recv wr queue */
136 return RESPST_CHK_RESOURCE
;
139 skb
= skb_peek(&qp
->req_pkts
);
143 *pkt_p
= SKB_TO_PKT(skb
);
145 return (qp
->resp
.res
) ? RESPST_READ_REPLY
: RESPST_CHK_PSN
;
148 static enum resp_states
check_psn(struct rxe_qp
*qp
,
149 struct rxe_pkt_info
*pkt
)
151 int diff
= psn_compare(pkt
->psn
, qp
->resp
.psn
);
152 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
154 switch (qp_type(qp
)) {
157 if (qp
->resp
.sent_psn_nak
)
158 return RESPST_CLEANUP
;
160 qp
->resp
.sent_psn_nak
= 1;
161 rxe_counter_inc(rxe
, RXE_CNT_OUT_OF_SEQ_REQ
);
162 return RESPST_ERR_PSN_OUT_OF_SEQ
;
164 } else if (diff
< 0) {
165 rxe_counter_inc(rxe
, RXE_CNT_DUP_REQ
);
166 return RESPST_DUPLICATE_REQUEST
;
169 if (qp
->resp
.sent_psn_nak
)
170 qp
->resp
.sent_psn_nak
= 0;
175 if (qp
->resp
.drop_msg
|| diff
!= 0) {
176 if (pkt
->mask
& RXE_START_MASK
) {
177 qp
->resp
.drop_msg
= 0;
178 return RESPST_CHK_OP_SEQ
;
181 qp
->resp
.drop_msg
= 1;
182 return RESPST_CLEANUP
;
189 return RESPST_CHK_OP_SEQ
;
192 static enum resp_states
check_op_seq(struct rxe_qp
*qp
,
193 struct rxe_pkt_info
*pkt
)
195 switch (qp_type(qp
)) {
197 switch (qp
->resp
.opcode
) {
198 case IB_OPCODE_RC_SEND_FIRST
:
199 case IB_OPCODE_RC_SEND_MIDDLE
:
200 switch (pkt
->opcode
) {
201 case IB_OPCODE_RC_SEND_MIDDLE
:
202 case IB_OPCODE_RC_SEND_LAST
:
203 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
204 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
205 return RESPST_CHK_OP_VALID
;
207 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
210 case IB_OPCODE_RC_RDMA_WRITE_FIRST
:
211 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
212 switch (pkt
->opcode
) {
213 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
214 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
215 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
216 return RESPST_CHK_OP_VALID
;
218 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
222 switch (pkt
->opcode
) {
223 case IB_OPCODE_RC_SEND_MIDDLE
:
224 case IB_OPCODE_RC_SEND_LAST
:
225 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
226 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
227 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
228 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
229 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
230 return RESPST_ERR_MISSING_OPCODE_FIRST
;
232 return RESPST_CHK_OP_VALID
;
238 switch (qp
->resp
.opcode
) {
239 case IB_OPCODE_UC_SEND_FIRST
:
240 case IB_OPCODE_UC_SEND_MIDDLE
:
241 switch (pkt
->opcode
) {
242 case IB_OPCODE_UC_SEND_MIDDLE
:
243 case IB_OPCODE_UC_SEND_LAST
:
244 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
245 return RESPST_CHK_OP_VALID
;
247 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
250 case IB_OPCODE_UC_RDMA_WRITE_FIRST
:
251 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
252 switch (pkt
->opcode
) {
253 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
254 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
255 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
256 return RESPST_CHK_OP_VALID
;
258 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
262 switch (pkt
->opcode
) {
263 case IB_OPCODE_UC_SEND_MIDDLE
:
264 case IB_OPCODE_UC_SEND_LAST
:
265 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
266 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
267 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
268 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
269 qp
->resp
.drop_msg
= 1;
270 return RESPST_CLEANUP
;
272 return RESPST_CHK_OP_VALID
;
278 return RESPST_CHK_OP_VALID
;
282 static enum resp_states
check_op_valid(struct rxe_qp
*qp
,
283 struct rxe_pkt_info
*pkt
)
285 switch (qp_type(qp
)) {
287 if (((pkt
->mask
& RXE_READ_MASK
) &&
288 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_READ
)) ||
289 ((pkt
->mask
& RXE_WRITE_MASK
) &&
290 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) ||
291 ((pkt
->mask
& RXE_ATOMIC_MASK
) &&
292 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_ATOMIC
))) {
293 return RESPST_ERR_UNSUPPORTED_OPCODE
;
299 if ((pkt
->mask
& RXE_WRITE_MASK
) &&
300 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) {
301 qp
->resp
.drop_msg
= 1;
302 return RESPST_CLEANUP
;
317 return RESPST_CHK_RESOURCE
;
320 static enum resp_states
get_srq_wqe(struct rxe_qp
*qp
)
322 struct rxe_srq
*srq
= qp
->srq
;
323 struct rxe_queue
*q
= srq
->rq
.queue
;
324 struct rxe_recv_wqe
*wqe
;
328 return RESPST_ERR_RNR
;
330 spin_lock_bh(&srq
->rq
.consumer_lock
);
334 spin_unlock_bh(&srq
->rq
.consumer_lock
);
335 return RESPST_ERR_RNR
;
338 /* note kernel and user space recv wqes have same size */
339 memcpy(&qp
->resp
.srq_wqe
, wqe
, sizeof(qp
->resp
.srq_wqe
));
341 qp
->resp
.wqe
= &qp
->resp
.srq_wqe
.wqe
;
344 if (srq
->limit
&& srq
->ibsrq
.event_handler
&&
345 (queue_count(q
) < srq
->limit
)) {
350 spin_unlock_bh(&srq
->rq
.consumer_lock
);
351 return RESPST_CHK_LENGTH
;
354 spin_unlock_bh(&srq
->rq
.consumer_lock
);
355 ev
.device
= qp
->ibqp
.device
;
356 ev
.element
.srq
= qp
->ibqp
.srq
;
357 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
358 srq
->ibsrq
.event_handler(&ev
, srq
->ibsrq
.srq_context
);
359 return RESPST_CHK_LENGTH
;
362 static enum resp_states
check_resource(struct rxe_qp
*qp
,
363 struct rxe_pkt_info
*pkt
)
365 struct rxe_srq
*srq
= qp
->srq
;
367 if (qp
->resp
.state
== QP_STATE_ERROR
) {
369 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
370 return RESPST_COMPLETE
;
372 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
374 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
375 return RESPST_COMPLETE
;
384 if (pkt
->mask
& RXE_READ_OR_ATOMIC
) {
385 /* it is the requesters job to not send
386 * too many read/atomic ops, we just
387 * recycle the responder resource queue
389 if (likely(qp
->attr
.max_dest_rd_atomic
> 0))
390 return RESPST_CHK_LENGTH
;
392 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
;
395 if (pkt
->mask
& RXE_RWR_MASK
) {
397 return get_srq_wqe(qp
);
399 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
400 return (qp
->resp
.wqe
) ? RESPST_CHK_LENGTH
: RESPST_ERR_RNR
;
403 return RESPST_CHK_LENGTH
;
406 static enum resp_states
check_length(struct rxe_qp
*qp
,
407 struct rxe_pkt_info
*pkt
)
409 switch (qp_type(qp
)) {
411 return RESPST_CHK_RKEY
;
414 return RESPST_CHK_RKEY
;
417 return RESPST_CHK_RKEY
;
421 static enum resp_states
check_rkey(struct rxe_qp
*qp
,
422 struct rxe_pkt_info
*pkt
)
424 struct rxe_mem
*mem
= NULL
;
430 enum resp_states state
;
433 if (pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_MASK
)) {
434 if (pkt
->mask
& RXE_RETH_MASK
) {
435 qp
->resp
.va
= reth_va(pkt
);
436 qp
->resp
.rkey
= reth_rkey(pkt
);
437 qp
->resp
.resid
= reth_len(pkt
);
439 access
= (pkt
->mask
& RXE_READ_MASK
) ? IB_ACCESS_REMOTE_READ
440 : IB_ACCESS_REMOTE_WRITE
;
441 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
442 qp
->resp
.va
= atmeth_va(pkt
);
443 qp
->resp
.rkey
= atmeth_rkey(pkt
);
444 qp
->resp
.resid
= sizeof(u64
);
445 access
= IB_ACCESS_REMOTE_ATOMIC
;
447 return RESPST_EXECUTE
;
450 /* A zero-byte op is not required to set an addr or rkey. */
451 if ((pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_OR_SEND
)) &&
452 (pkt
->mask
& RXE_RETH_MASK
) &&
453 reth_len(pkt
) == 0) {
454 return RESPST_EXECUTE
;
458 rkey
= qp
->resp
.rkey
;
459 resid
= qp
->resp
.resid
;
460 pktlen
= payload_size(pkt
);
462 mem
= lookup_mem(qp
->pd
, access
, rkey
, lookup_remote
);
464 state
= RESPST_ERR_RKEY_VIOLATION
;
468 if (unlikely(mem
->state
== RXE_MEM_STATE_FREE
)) {
469 state
= RESPST_ERR_RKEY_VIOLATION
;
473 if (mem_check_range(mem
, va
, resid
)) {
474 state
= RESPST_ERR_RKEY_VIOLATION
;
478 if (pkt
->mask
& RXE_WRITE_MASK
) {
480 if (pktlen
!= mtu
|| bth_pad(pkt
)) {
481 state
= RESPST_ERR_LENGTH
;
485 if (pktlen
!= resid
) {
486 state
= RESPST_ERR_LENGTH
;
489 if ((bth_pad(pkt
) != (0x3 & (-resid
)))) {
490 /* This case may not be exactly that
491 * but nothing else fits.
493 state
= RESPST_ERR_LENGTH
;
499 WARN_ON_ONCE(qp
->resp
.mr
);
502 return RESPST_EXECUTE
;
510 static enum resp_states
send_data_in(struct rxe_qp
*qp
, void *data_addr
,
515 err
= copy_data(qp
->pd
, IB_ACCESS_LOCAL_WRITE
, &qp
->resp
.wqe
->dma
,
516 data_addr
, data_len
, to_mem_obj
, NULL
);
518 return (err
== -ENOSPC
) ? RESPST_ERR_LENGTH
519 : RESPST_ERR_MALFORMED_WQE
;
524 static enum resp_states
write_data_in(struct rxe_qp
*qp
,
525 struct rxe_pkt_info
*pkt
)
527 enum resp_states rc
= RESPST_NONE
;
529 int data_len
= payload_size(pkt
);
531 err
= rxe_mem_copy(qp
->resp
.mr
, qp
->resp
.va
, payload_addr(pkt
),
532 data_len
, to_mem_obj
, NULL
);
534 rc
= RESPST_ERR_RKEY_VIOLATION
;
538 qp
->resp
.va
+= data_len
;
539 qp
->resp
.resid
-= data_len
;
545 /* Guarantee atomicity of atomic operations at the machine level. */
546 static DEFINE_SPINLOCK(atomic_ops_lock
);
548 static enum resp_states
process_atomic(struct rxe_qp
*qp
,
549 struct rxe_pkt_info
*pkt
)
551 u64 iova
= atmeth_va(pkt
);
553 enum resp_states ret
;
554 struct rxe_mem
*mr
= qp
->resp
.mr
;
556 if (mr
->state
!= RXE_MEM_STATE_VALID
) {
557 ret
= RESPST_ERR_RKEY_VIOLATION
;
561 vaddr
= iova_to_vaddr(mr
, iova
, sizeof(u64
));
563 /* check vaddr is 8 bytes aligned. */
564 if (!vaddr
|| (uintptr_t)vaddr
& 7) {
565 ret
= RESPST_ERR_MISALIGNED_ATOMIC
;
569 spin_lock_bh(&atomic_ops_lock
);
571 qp
->resp
.atomic_orig
= *vaddr
;
573 if (pkt
->opcode
== IB_OPCODE_RC_COMPARE_SWAP
||
574 pkt
->opcode
== IB_OPCODE_RD_COMPARE_SWAP
) {
575 if (*vaddr
== atmeth_comp(pkt
))
576 *vaddr
= atmeth_swap_add(pkt
);
578 *vaddr
+= atmeth_swap_add(pkt
);
581 spin_unlock_bh(&atomic_ops_lock
);
588 static struct sk_buff
*prepare_ack_packet(struct rxe_qp
*qp
,
589 struct rxe_pkt_info
*pkt
,
590 struct rxe_pkt_info
*ack
,
597 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
608 pad
= (-payload
) & 0x3;
609 paylen
= rxe_opcode
[opcode
].length
+ payload
+ pad
+ RXE_ICRC_SIZE
;
611 skb
= rxe_init_packet(rxe
, &qp
->pri_av
, paylen
, ack
);
616 ack
->opcode
= opcode
;
617 ack
->mask
= rxe_opcode
[opcode
].mask
;
618 ack
->offset
= pkt
->offset
;
619 ack
->paylen
= paylen
;
621 /* fill in bth using the request packet headers */
622 memcpy(ack
->hdr
, pkt
->hdr
, pkt
->offset
+ RXE_BTH_BYTES
);
624 bth_set_opcode(ack
, opcode
);
625 bth_set_qpn(ack
, qp
->attr
.dest_qp_num
);
626 bth_set_pad(ack
, pad
);
628 bth_set_psn(ack
, psn
);
632 if (ack
->mask
& RXE_AETH_MASK
) {
633 aeth_set_syn(ack
, syndrome
);
634 aeth_set_msn(ack
, qp
->resp
.msn
);
637 if (ack
->mask
& RXE_ATMACK_MASK
)
638 atmack_set_orig(ack
, qp
->resp
.atomic_orig
);
640 err
= rxe_prepare(rxe
, ack
, skb
, &crc
);
647 /* CRC computation will be continued by the caller */
650 p
= payload_addr(ack
) + payload
+ bth_pad(ack
);
657 /* RDMA read response. If res is not NULL, then we have a current RDMA request
658 * being processed or replayed.
660 static enum resp_states
read_reply(struct rxe_qp
*qp
,
661 struct rxe_pkt_info
*req_pkt
)
663 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
664 struct rxe_pkt_info ack_pkt
;
667 enum resp_states state
;
671 struct resp_res
*res
= qp
->resp
.res
;
676 /* This is the first time we process that request. Get a
679 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
681 free_rd_atomic_resource(qp
, res
);
682 rxe_advance_resp_resource(qp
);
684 res
->type
= RXE_READ_MASK
;
687 res
->read
.va
= qp
->resp
.va
;
688 res
->read
.va_org
= qp
->resp
.va
;
690 res
->first_psn
= req_pkt
->psn
;
692 if (reth_len(req_pkt
)) {
693 res
->last_psn
= (req_pkt
->psn
+
694 (reth_len(req_pkt
) + mtu
- 1) /
695 mtu
- 1) & BTH_PSN_MASK
;
697 res
->last_psn
= res
->first_psn
;
699 res
->cur_psn
= req_pkt
->psn
;
701 res
->read
.resid
= qp
->resp
.resid
;
702 res
->read
.length
= qp
->resp
.resid
;
703 res
->read
.rkey
= qp
->resp
.rkey
;
705 /* note res inherits the reference to mr from qp */
706 res
->read
.mr
= qp
->resp
.mr
;
710 res
->state
= rdatm_res_state_new
;
713 if (res
->state
== rdatm_res_state_new
) {
714 if (res
->read
.resid
<= mtu
)
715 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY
;
717 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST
;
719 if (res
->read
.resid
> mtu
)
720 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
;
722 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST
;
725 res
->state
= rdatm_res_state_next
;
727 payload
= min_t(int, res
->read
.resid
, mtu
);
729 skb
= prepare_ack_packet(qp
, req_pkt
, &ack_pkt
, opcode
, payload
,
730 res
->cur_psn
, AETH_ACK_UNLIMITED
, &icrc
);
732 return RESPST_ERR_RNR
;
734 err
= rxe_mem_copy(res
->read
.mr
, res
->read
.va
, payload_addr(&ack_pkt
),
735 payload
, from_mem_obj
, &icrc
);
737 pr_err("Failed copying memory\n");
739 p
= payload_addr(&ack_pkt
) + payload
+ bth_pad(&ack_pkt
);
742 err
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb
);
744 pr_err("Failed sending RDMA reply.\n");
745 return RESPST_ERR_RNR
;
748 res
->read
.va
+= payload
;
749 res
->read
.resid
-= payload
;
750 res
->cur_psn
= (res
->cur_psn
+ 1) & BTH_PSN_MASK
;
752 if (res
->read
.resid
> 0) {
757 qp
->resp
.opcode
= -1;
758 if (psn_compare(res
->cur_psn
, qp
->resp
.psn
) >= 0)
759 qp
->resp
.psn
= res
->cur_psn
;
760 state
= RESPST_CLEANUP
;
766 static void build_rdma_network_hdr(union rdma_network_hdr
*hdr
,
767 struct rxe_pkt_info
*pkt
)
769 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
771 memset(hdr
, 0, sizeof(*hdr
));
772 if (skb
->protocol
== htons(ETH_P_IP
))
773 memcpy(&hdr
->roce4grh
, ip_hdr(skb
), sizeof(hdr
->roce4grh
));
774 else if (skb
->protocol
== htons(ETH_P_IPV6
))
775 memcpy(&hdr
->ibgrh
, ipv6_hdr(skb
), sizeof(hdr
->ibgrh
));
778 /* Executes a new request. A retried request never reach that function (send
779 * and writes are discarded, and reads and atomics are retried elsewhere.
781 static enum resp_states
execute(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
)
783 enum resp_states err
;
785 if (pkt
->mask
& RXE_SEND_MASK
) {
786 if (qp_type(qp
) == IB_QPT_UD
||
787 qp_type(qp
) == IB_QPT_SMI
||
788 qp_type(qp
) == IB_QPT_GSI
) {
789 union rdma_network_hdr hdr
;
791 build_rdma_network_hdr(&hdr
, pkt
);
793 err
= send_data_in(qp
, &hdr
, sizeof(hdr
));
797 err
= send_data_in(qp
, payload_addr(pkt
), payload_size(pkt
));
800 } else if (pkt
->mask
& RXE_WRITE_MASK
) {
801 err
= write_data_in(qp
, pkt
);
804 } else if (pkt
->mask
& RXE_READ_MASK
) {
805 /* For RDMA Read we can increment the msn now. See C9-148. */
807 return RESPST_READ_REPLY
;
808 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
809 err
= process_atomic(qp
, pkt
);
817 /* next expected psn, read handles this separately */
818 qp
->resp
.psn
= (pkt
->psn
+ 1) & BTH_PSN_MASK
;
819 qp
->resp
.ack_psn
= qp
->resp
.psn
;
821 qp
->resp
.opcode
= pkt
->opcode
;
822 qp
->resp
.status
= IB_WC_SUCCESS
;
824 if (pkt
->mask
& RXE_COMP_MASK
) {
825 /* We successfully processed this new request. */
827 return RESPST_COMPLETE
;
828 } else if (qp_type(qp
) == IB_QPT_RC
)
829 return RESPST_ACKNOWLEDGE
;
831 return RESPST_CLEANUP
;
834 static enum resp_states
do_complete(struct rxe_qp
*qp
,
835 struct rxe_pkt_info
*pkt
)
838 struct ib_wc
*wc
= &cqe
.ibwc
;
839 struct ib_uverbs_wc
*uwc
= &cqe
.uibwc
;
840 struct rxe_recv_wqe
*wqe
= qp
->resp
.wqe
;
843 return RESPST_CLEANUP
;
845 memset(&cqe
, 0, sizeof(cqe
));
847 if (qp
->rcq
->is_user
) {
848 uwc
->status
= qp
->resp
.status
;
849 uwc
->qp_num
= qp
->ibqp
.qp_num
;
850 uwc
->wr_id
= wqe
->wr_id
;
852 wc
->status
= qp
->resp
.status
;
854 wc
->wr_id
= wqe
->wr_id
;
857 if (wc
->status
== IB_WC_SUCCESS
) {
858 wc
->opcode
= (pkt
->mask
& RXE_IMMDT_MASK
&&
859 pkt
->mask
& RXE_WRITE_MASK
) ?
860 IB_WC_RECV_RDMA_WITH_IMM
: IB_WC_RECV
;
862 wc
->byte_len
= wqe
->dma
.length
- wqe
->dma
.resid
;
864 /* fields after byte_len are different between kernel and user
867 if (qp
->rcq
->is_user
) {
868 uwc
->wc_flags
= IB_WC_GRH
;
870 if (pkt
->mask
& RXE_IMMDT_MASK
) {
871 uwc
->wc_flags
|= IB_WC_WITH_IMM
;
872 uwc
->ex
.imm_data
= immdt_imm(pkt
);
875 if (pkt
->mask
& RXE_IETH_MASK
) {
876 uwc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
877 uwc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
880 uwc
->qp_num
= qp
->ibqp
.qp_num
;
882 if (pkt
->mask
& RXE_DETH_MASK
)
883 uwc
->src_qp
= deth_sqp(pkt
);
885 uwc
->port_num
= qp
->attr
.port_num
;
887 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
889 wc
->wc_flags
= IB_WC_GRH
| IB_WC_WITH_NETWORK_HDR_TYPE
;
890 if (skb
->protocol
== htons(ETH_P_IP
))
891 wc
->network_hdr_type
= RDMA_NETWORK_IPV4
;
893 wc
->network_hdr_type
= RDMA_NETWORK_IPV6
;
895 if (is_vlan_dev(skb
->dev
)) {
896 wc
->wc_flags
|= IB_WC_WITH_VLAN
;
897 wc
->vlan_id
= vlan_dev_vlan_id(skb
->dev
);
900 if (pkt
->mask
& RXE_IMMDT_MASK
) {
901 wc
->wc_flags
|= IB_WC_WITH_IMM
;
902 wc
->ex
.imm_data
= immdt_imm(pkt
);
905 if (pkt
->mask
& RXE_IETH_MASK
) {
906 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
909 wc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
910 wc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
912 rmr
= rxe_pool_get_index(&rxe
->mr_pool
,
913 wc
->ex
.invalidate_rkey
>> 8);
914 if (unlikely(!rmr
)) {
915 pr_err("Bad rkey %#x invalidation\n",
916 wc
->ex
.invalidate_rkey
);
919 rmr
->state
= RXE_MEM_STATE_FREE
;
925 if (pkt
->mask
& RXE_DETH_MASK
)
926 wc
->src_qp
= deth_sqp(pkt
);
928 wc
->port_num
= qp
->attr
.port_num
;
932 /* have copy for srq and reference for !srq */
934 advance_consumer(qp
->rq
.queue
);
938 if (rxe_cq_post(qp
->rcq
, &cqe
, pkt
? bth_se(pkt
) : 1))
939 return RESPST_ERR_CQ_OVERFLOW
;
941 if (qp
->resp
.state
== QP_STATE_ERROR
)
942 return RESPST_CHK_RESOURCE
;
946 else if (qp_type(qp
) == IB_QPT_RC
)
947 return RESPST_ACKNOWLEDGE
;
949 return RESPST_CLEANUP
;
952 static int send_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
953 u8 syndrome
, u32 psn
)
956 struct rxe_pkt_info ack_pkt
;
958 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
960 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
, IB_OPCODE_RC_ACKNOWLEDGE
,
961 0, psn
, syndrome
, NULL
);
967 err
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb
);
969 pr_err_ratelimited("Failed sending ack\n");
975 static int send_atomic_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
979 struct rxe_pkt_info ack_pkt
;
981 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
982 struct resp_res
*res
;
984 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
,
985 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE
, 0, pkt
->psn
,
994 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
995 free_rd_atomic_resource(qp
, res
);
996 rxe_advance_resp_resource(qp
);
998 memcpy(SKB_TO_PKT(skb
), &ack_pkt
, sizeof(ack_pkt
));
999 memset((unsigned char *)SKB_TO_PKT(skb
) + sizeof(ack_pkt
), 0,
1000 sizeof(skb
->cb
) - sizeof(ack_pkt
));
1003 res
->type
= RXE_ATOMIC_MASK
;
1004 res
->atomic
.skb
= skb
;
1005 res
->first_psn
= ack_pkt
.psn
;
1006 res
->last_psn
= ack_pkt
.psn
;
1007 res
->cur_psn
= ack_pkt
.psn
;
1009 rc
= rxe_xmit_packet(rxe
, qp
, &ack_pkt
, skb
);
1011 pr_err_ratelimited("Failed sending ack\n");
1018 static enum resp_states
acknowledge(struct rxe_qp
*qp
,
1019 struct rxe_pkt_info
*pkt
)
1021 if (qp_type(qp
) != IB_QPT_RC
)
1022 return RESPST_CLEANUP
;
1024 if (qp
->resp
.aeth_syndrome
!= AETH_ACK_UNLIMITED
)
1025 send_ack(qp
, pkt
, qp
->resp
.aeth_syndrome
, pkt
->psn
);
1026 else if (pkt
->mask
& RXE_ATOMIC_MASK
)
1027 send_atomic_ack(qp
, pkt
, AETH_ACK_UNLIMITED
);
1028 else if (bth_ack(pkt
))
1029 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, pkt
->psn
);
1031 return RESPST_CLEANUP
;
1034 static enum resp_states
cleanup(struct rxe_qp
*qp
,
1035 struct rxe_pkt_info
*pkt
)
1037 struct sk_buff
*skb
;
1040 skb
= skb_dequeue(&qp
->req_pkts
);
1046 rxe_drop_ref(qp
->resp
.mr
);
1053 static struct resp_res
*find_resource(struct rxe_qp
*qp
, u32 psn
)
1057 for (i
= 0; i
< qp
->attr
.max_dest_rd_atomic
; i
++) {
1058 struct resp_res
*res
= &qp
->resp
.resources
[i
];
1063 if (psn_compare(psn
, res
->first_psn
) >= 0 &&
1064 psn_compare(psn
, res
->last_psn
) <= 0) {
1072 static enum resp_states
duplicate_request(struct rxe_qp
*qp
,
1073 struct rxe_pkt_info
*pkt
)
1075 enum resp_states rc
;
1076 u32 prev_psn
= (qp
->resp
.ack_psn
- 1) & BTH_PSN_MASK
;
1078 if (pkt
->mask
& RXE_SEND_MASK
||
1079 pkt
->mask
& RXE_WRITE_MASK
) {
1080 /* SEND. Ack again and cleanup. C9-105. */
1082 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, prev_psn
);
1083 rc
= RESPST_CLEANUP
;
1085 } else if (pkt
->mask
& RXE_READ_MASK
) {
1086 struct resp_res
*res
;
1088 res
= find_resource(qp
, pkt
->psn
);
1090 /* Resource not found. Class D error. Drop the
1093 rc
= RESPST_CLEANUP
;
1096 /* Ensure this new request is the same as the previous
1097 * one or a subset of it.
1099 u64 iova
= reth_va(pkt
);
1100 u32 resid
= reth_len(pkt
);
1102 if (iova
< res
->read
.va_org
||
1103 resid
> res
->read
.length
||
1104 (iova
+ resid
) > (res
->read
.va_org
+
1105 res
->read
.length
)) {
1106 rc
= RESPST_CLEANUP
;
1110 if (reth_rkey(pkt
) != res
->read
.rkey
) {
1111 rc
= RESPST_CLEANUP
;
1115 res
->cur_psn
= pkt
->psn
;
1116 res
->state
= (pkt
->psn
== res
->first_psn
) ?
1117 rdatm_res_state_new
:
1118 rdatm_res_state_replay
;
1121 /* Reset the resource, except length. */
1122 res
->read
.va_org
= iova
;
1123 res
->read
.va
= iova
;
1124 res
->read
.resid
= resid
;
1126 /* Replay the RDMA read reply. */
1128 rc
= RESPST_READ_REPLY
;
1132 struct resp_res
*res
;
1134 /* Find the operation in our list of responder resources. */
1135 res
= find_resource(qp
, pkt
->psn
);
1137 skb_get(res
->atomic
.skb
);
1138 /* Resend the result. */
1139 rc
= rxe_xmit_packet(to_rdev(qp
->ibqp
.device
), qp
,
1140 pkt
, res
->atomic
.skb
);
1142 pr_err("Failed resending result. This flow is not handled - skb ignored\n");
1143 rc
= RESPST_CLEANUP
;
1148 /* Resource not found. Class D error. Drop the request. */
1149 rc
= RESPST_CLEANUP
;
1156 /* Process a class A or C. Both are treated the same in this implementation. */
1157 static void do_class_ac_error(struct rxe_qp
*qp
, u8 syndrome
,
1158 enum ib_wc_status status
)
1160 qp
->resp
.aeth_syndrome
= syndrome
;
1161 qp
->resp
.status
= status
;
1163 /* indicate that we should go through the ERROR state */
1164 qp
->resp
.goto_error
= 1;
1167 static enum resp_states
do_class_d1e_error(struct rxe_qp
*qp
)
1172 qp
->resp
.drop_msg
= 1;
1174 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1175 return RESPST_COMPLETE
;
1177 return RESPST_CLEANUP
;
1180 /* Class D1. This packet may be the start of a
1181 * new message and could be valid. The previous
1182 * message is invalid and ignored. reset the
1183 * recv wr to its original state
1186 qp
->resp
.wqe
->dma
.resid
= qp
->resp
.wqe
->dma
.length
;
1187 qp
->resp
.wqe
->dma
.cur_sge
= 0;
1188 qp
->resp
.wqe
->dma
.sge_offset
= 0;
1189 qp
->resp
.opcode
= -1;
1193 rxe_drop_ref(qp
->resp
.mr
);
1197 return RESPST_CLEANUP
;
1201 static void rxe_drain_req_pkts(struct rxe_qp
*qp
, bool notify
)
1203 struct sk_buff
*skb
;
1205 while ((skb
= skb_dequeue(&qp
->req_pkts
))) {
1213 while (!qp
->srq
&& qp
->rq
.queue
&& queue_head(qp
->rq
.queue
))
1214 advance_consumer(qp
->rq
.queue
);
1217 int rxe_responder(void *arg
)
1219 struct rxe_qp
*qp
= (struct rxe_qp
*)arg
;
1220 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
1221 enum resp_states state
;
1222 struct rxe_pkt_info
*pkt
= NULL
;
1227 qp
->resp
.aeth_syndrome
= AETH_ACK_UNLIMITED
;
1234 switch (qp
->resp
.state
) {
1235 case QP_STATE_RESET
:
1236 state
= RESPST_RESET
;
1240 state
= RESPST_GET_REQ
;
1245 pr_debug("qp#%d state = %s\n", qp_num(qp
),
1246 resp_state_name
[state
]);
1248 case RESPST_GET_REQ
:
1249 state
= get_req(qp
, &pkt
);
1251 case RESPST_CHK_PSN
:
1252 state
= check_psn(qp
, pkt
);
1254 case RESPST_CHK_OP_SEQ
:
1255 state
= check_op_seq(qp
, pkt
);
1257 case RESPST_CHK_OP_VALID
:
1258 state
= check_op_valid(qp
, pkt
);
1260 case RESPST_CHK_RESOURCE
:
1261 state
= check_resource(qp
, pkt
);
1263 case RESPST_CHK_LENGTH
:
1264 state
= check_length(qp
, pkt
);
1266 case RESPST_CHK_RKEY
:
1267 state
= check_rkey(qp
, pkt
);
1269 case RESPST_EXECUTE
:
1270 state
= execute(qp
, pkt
);
1272 case RESPST_COMPLETE
:
1273 state
= do_complete(qp
, pkt
);
1275 case RESPST_READ_REPLY
:
1276 state
= read_reply(qp
, pkt
);
1278 case RESPST_ACKNOWLEDGE
:
1279 state
= acknowledge(qp
, pkt
);
1281 case RESPST_CLEANUP
:
1282 state
= cleanup(qp
, pkt
);
1284 case RESPST_DUPLICATE_REQUEST
:
1285 state
= duplicate_request(qp
, pkt
);
1287 case RESPST_ERR_PSN_OUT_OF_SEQ
:
1288 /* RC only - Class B. Drop packet. */
1289 send_ack(qp
, pkt
, AETH_NAK_PSN_SEQ_ERROR
, qp
->resp
.psn
);
1290 state
= RESPST_CLEANUP
;
1293 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
:
1294 case RESPST_ERR_MISSING_OPCODE_FIRST
:
1295 case RESPST_ERR_MISSING_OPCODE_LAST_C
:
1296 case RESPST_ERR_UNSUPPORTED_OPCODE
:
1297 case RESPST_ERR_MISALIGNED_ATOMIC
:
1298 /* RC Only - Class C. */
1299 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1300 IB_WC_REM_INV_REQ_ERR
);
1301 state
= RESPST_COMPLETE
;
1304 case RESPST_ERR_MISSING_OPCODE_LAST_D1E
:
1305 state
= do_class_d1e_error(qp
);
1307 case RESPST_ERR_RNR
:
1308 if (qp_type(qp
) == IB_QPT_RC
) {
1309 rxe_counter_inc(rxe
, RXE_CNT_SND_RNR
);
1311 send_ack(qp
, pkt
, AETH_RNR_NAK
|
1313 qp
->attr
.min_rnr_timer
),
1316 /* UD/UC - class D */
1317 qp
->resp
.drop_msg
= 1;
1319 state
= RESPST_CLEANUP
;
1322 case RESPST_ERR_RKEY_VIOLATION
:
1323 if (qp_type(qp
) == IB_QPT_RC
) {
1325 do_class_ac_error(qp
, AETH_NAK_REM_ACC_ERR
,
1326 IB_WC_REM_ACCESS_ERR
);
1327 state
= RESPST_COMPLETE
;
1329 qp
->resp
.drop_msg
= 1;
1331 /* UC/SRQ Class D */
1332 qp
->resp
.status
= IB_WC_REM_ACCESS_ERR
;
1333 state
= RESPST_COMPLETE
;
1335 /* UC/non-SRQ Class E. */
1336 state
= RESPST_CLEANUP
;
1341 case RESPST_ERR_LENGTH
:
1342 if (qp_type(qp
) == IB_QPT_RC
) {
1344 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1345 IB_WC_REM_INV_REQ_ERR
);
1346 state
= RESPST_COMPLETE
;
1347 } else if (qp
->srq
) {
1348 /* UC/UD - class E */
1349 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1350 state
= RESPST_COMPLETE
;
1352 /* UC/UD - class D */
1353 qp
->resp
.drop_msg
= 1;
1354 state
= RESPST_CLEANUP
;
1358 case RESPST_ERR_MALFORMED_WQE
:
1360 do_class_ac_error(qp
, AETH_NAK_REM_OP_ERR
,
1361 IB_WC_LOC_QP_OP_ERR
);
1362 state
= RESPST_COMPLETE
;
1365 case RESPST_ERR_CQ_OVERFLOW
:
1367 state
= RESPST_ERROR
;
1371 if (qp
->resp
.goto_error
) {
1372 state
= RESPST_ERROR
;
1379 if (qp
->resp
.goto_error
) {
1380 state
= RESPST_ERROR
;
1387 rxe_drain_req_pkts(qp
, false);
1388 qp
->resp
.wqe
= NULL
;
1392 qp
->resp
.goto_error
= 0;
1393 pr_warn("qp#%d moved to error state\n", qp_num(qp
));