2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/skbuff.h>
38 #include "rxe_queue.h"
54 RESPST_DUPLICATE_REQUEST
,
55 RESPST_ERR_MALFORMED_WQE
,
56 RESPST_ERR_UNSUPPORTED_OPCODE
,
57 RESPST_ERR_MISALIGNED_ATOMIC
,
58 RESPST_ERR_PSN_OUT_OF_SEQ
,
59 RESPST_ERR_MISSING_OPCODE_FIRST
,
60 RESPST_ERR_MISSING_OPCODE_LAST_C
,
61 RESPST_ERR_MISSING_OPCODE_LAST_D1E
,
62 RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
,
64 RESPST_ERR_RKEY_VIOLATION
,
66 RESPST_ERR_CQ_OVERFLOW
,
73 static char *resp_state_name
[] = {
74 [RESPST_NONE
] = "NONE",
75 [RESPST_GET_REQ
] = "GET_REQ",
76 [RESPST_CHK_PSN
] = "CHK_PSN",
77 [RESPST_CHK_OP_SEQ
] = "CHK_OP_SEQ",
78 [RESPST_CHK_OP_VALID
] = "CHK_OP_VALID",
79 [RESPST_CHK_RESOURCE
] = "CHK_RESOURCE",
80 [RESPST_CHK_LENGTH
] = "CHK_LENGTH",
81 [RESPST_CHK_RKEY
] = "CHK_RKEY",
82 [RESPST_EXECUTE
] = "EXECUTE",
83 [RESPST_READ_REPLY
] = "READ_REPLY",
84 [RESPST_COMPLETE
] = "COMPLETE",
85 [RESPST_ACKNOWLEDGE
] = "ACKNOWLEDGE",
86 [RESPST_CLEANUP
] = "CLEANUP",
87 [RESPST_DUPLICATE_REQUEST
] = "DUPLICATE_REQUEST",
88 [RESPST_ERR_MALFORMED_WQE
] = "ERR_MALFORMED_WQE",
89 [RESPST_ERR_UNSUPPORTED_OPCODE
] = "ERR_UNSUPPORTED_OPCODE",
90 [RESPST_ERR_MISALIGNED_ATOMIC
] = "ERR_MISALIGNED_ATOMIC",
91 [RESPST_ERR_PSN_OUT_OF_SEQ
] = "ERR_PSN_OUT_OF_SEQ",
92 [RESPST_ERR_MISSING_OPCODE_FIRST
] = "ERR_MISSING_OPCODE_FIRST",
93 [RESPST_ERR_MISSING_OPCODE_LAST_C
] = "ERR_MISSING_OPCODE_LAST_C",
94 [RESPST_ERR_MISSING_OPCODE_LAST_D1E
] = "ERR_MISSING_OPCODE_LAST_D1E",
95 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
] = "ERR_TOO_MANY_RDMA_ATM_REQ",
96 [RESPST_ERR_RNR
] = "ERR_RNR",
97 [RESPST_ERR_RKEY_VIOLATION
] = "ERR_RKEY_VIOLATION",
98 [RESPST_ERR_LENGTH
] = "ERR_LENGTH",
99 [RESPST_ERR_CQ_OVERFLOW
] = "ERR_CQ_OVERFLOW",
100 [RESPST_ERROR
] = "ERROR",
101 [RESPST_RESET
] = "RESET",
102 [RESPST_DONE
] = "DONE",
103 [RESPST_EXIT
] = "EXIT",
106 /* rxe_recv calls here to add a request packet to the input queue */
107 void rxe_resp_queue_pkt(struct rxe_qp
*qp
, struct sk_buff
*skb
)
110 struct rxe_pkt_info
*pkt
= SKB_TO_PKT(skb
);
112 skb_queue_tail(&qp
->req_pkts
, skb
);
114 must_sched
= (pkt
->opcode
== IB_OPCODE_RC_RDMA_READ_REQUEST
) ||
115 (skb_queue_len(&qp
->req_pkts
) > 1);
117 rxe_run_task(&qp
->resp
.task
, must_sched
);
120 static inline enum resp_states
get_req(struct rxe_qp
*qp
,
121 struct rxe_pkt_info
**pkt_p
)
125 if (qp
->resp
.state
== QP_STATE_ERROR
) {
126 while ((skb
= skb_dequeue(&qp
->req_pkts
))) {
131 /* go drain recv wr queue */
132 return RESPST_CHK_RESOURCE
;
135 skb
= skb_peek(&qp
->req_pkts
);
139 *pkt_p
= SKB_TO_PKT(skb
);
141 return (qp
->resp
.res
) ? RESPST_READ_REPLY
: RESPST_CHK_PSN
;
144 static enum resp_states
check_psn(struct rxe_qp
*qp
,
145 struct rxe_pkt_info
*pkt
)
147 int diff
= psn_compare(pkt
->psn
, qp
->resp
.psn
);
148 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
150 switch (qp_type(qp
)) {
153 if (qp
->resp
.sent_psn_nak
)
154 return RESPST_CLEANUP
;
156 qp
->resp
.sent_psn_nak
= 1;
157 rxe_counter_inc(rxe
, RXE_CNT_OUT_OF_SEQ_REQ
);
158 return RESPST_ERR_PSN_OUT_OF_SEQ
;
160 } else if (diff
< 0) {
161 rxe_counter_inc(rxe
, RXE_CNT_DUP_REQ
);
162 return RESPST_DUPLICATE_REQUEST
;
165 if (qp
->resp
.sent_psn_nak
)
166 qp
->resp
.sent_psn_nak
= 0;
171 if (qp
->resp
.drop_msg
|| diff
!= 0) {
172 if (pkt
->mask
& RXE_START_MASK
) {
173 qp
->resp
.drop_msg
= 0;
174 return RESPST_CHK_OP_SEQ
;
177 qp
->resp
.drop_msg
= 1;
178 return RESPST_CLEANUP
;
185 return RESPST_CHK_OP_SEQ
;
188 static enum resp_states
check_op_seq(struct rxe_qp
*qp
,
189 struct rxe_pkt_info
*pkt
)
191 switch (qp_type(qp
)) {
193 switch (qp
->resp
.opcode
) {
194 case IB_OPCODE_RC_SEND_FIRST
:
195 case IB_OPCODE_RC_SEND_MIDDLE
:
196 switch (pkt
->opcode
) {
197 case IB_OPCODE_RC_SEND_MIDDLE
:
198 case IB_OPCODE_RC_SEND_LAST
:
199 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
200 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
201 return RESPST_CHK_OP_VALID
;
203 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
206 case IB_OPCODE_RC_RDMA_WRITE_FIRST
:
207 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
208 switch (pkt
->opcode
) {
209 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
210 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
211 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
212 return RESPST_CHK_OP_VALID
;
214 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
218 switch (pkt
->opcode
) {
219 case IB_OPCODE_RC_SEND_MIDDLE
:
220 case IB_OPCODE_RC_SEND_LAST
:
221 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
222 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
223 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
224 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
225 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
226 return RESPST_ERR_MISSING_OPCODE_FIRST
;
228 return RESPST_CHK_OP_VALID
;
234 switch (qp
->resp
.opcode
) {
235 case IB_OPCODE_UC_SEND_FIRST
:
236 case IB_OPCODE_UC_SEND_MIDDLE
:
237 switch (pkt
->opcode
) {
238 case IB_OPCODE_UC_SEND_MIDDLE
:
239 case IB_OPCODE_UC_SEND_LAST
:
240 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
241 return RESPST_CHK_OP_VALID
;
243 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
246 case IB_OPCODE_UC_RDMA_WRITE_FIRST
:
247 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
248 switch (pkt
->opcode
) {
249 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
250 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
251 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
252 return RESPST_CHK_OP_VALID
;
254 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
258 switch (pkt
->opcode
) {
259 case IB_OPCODE_UC_SEND_MIDDLE
:
260 case IB_OPCODE_UC_SEND_LAST
:
261 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
262 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
263 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
264 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
265 qp
->resp
.drop_msg
= 1;
266 return RESPST_CLEANUP
;
268 return RESPST_CHK_OP_VALID
;
274 return RESPST_CHK_OP_VALID
;
278 static enum resp_states
check_op_valid(struct rxe_qp
*qp
,
279 struct rxe_pkt_info
*pkt
)
281 switch (qp_type(qp
)) {
283 if (((pkt
->mask
& RXE_READ_MASK
) &&
284 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_READ
)) ||
285 ((pkt
->mask
& RXE_WRITE_MASK
) &&
286 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) ||
287 ((pkt
->mask
& RXE_ATOMIC_MASK
) &&
288 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_ATOMIC
))) {
289 return RESPST_ERR_UNSUPPORTED_OPCODE
;
295 if ((pkt
->mask
& RXE_WRITE_MASK
) &&
296 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) {
297 qp
->resp
.drop_msg
= 1;
298 return RESPST_CLEANUP
;
313 return RESPST_CHK_RESOURCE
;
316 static enum resp_states
get_srq_wqe(struct rxe_qp
*qp
)
318 struct rxe_srq
*srq
= qp
->srq
;
319 struct rxe_queue
*q
= srq
->rq
.queue
;
320 struct rxe_recv_wqe
*wqe
;
324 return RESPST_ERR_RNR
;
326 spin_lock_bh(&srq
->rq
.consumer_lock
);
330 spin_unlock_bh(&srq
->rq
.consumer_lock
);
331 return RESPST_ERR_RNR
;
334 /* note kernel and user space recv wqes have same size */
335 memcpy(&qp
->resp
.srq_wqe
, wqe
, sizeof(qp
->resp
.srq_wqe
));
337 qp
->resp
.wqe
= &qp
->resp
.srq_wqe
.wqe
;
340 if (srq
->limit
&& srq
->ibsrq
.event_handler
&&
341 (queue_count(q
) < srq
->limit
)) {
346 spin_unlock_bh(&srq
->rq
.consumer_lock
);
347 return RESPST_CHK_LENGTH
;
350 spin_unlock_bh(&srq
->rq
.consumer_lock
);
351 ev
.device
= qp
->ibqp
.device
;
352 ev
.element
.srq
= qp
->ibqp
.srq
;
353 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
354 srq
->ibsrq
.event_handler(&ev
, srq
->ibsrq
.srq_context
);
355 return RESPST_CHK_LENGTH
;
358 static enum resp_states
check_resource(struct rxe_qp
*qp
,
359 struct rxe_pkt_info
*pkt
)
361 struct rxe_srq
*srq
= qp
->srq
;
363 if (qp
->resp
.state
== QP_STATE_ERROR
) {
365 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
366 return RESPST_COMPLETE
;
368 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
370 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
371 return RESPST_COMPLETE
;
380 if (pkt
->mask
& RXE_READ_OR_ATOMIC
) {
381 /* it is the requesters job to not send
382 * too many read/atomic ops, we just
383 * recycle the responder resource queue
385 if (likely(qp
->attr
.max_dest_rd_atomic
> 0))
386 return RESPST_CHK_LENGTH
;
388 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
;
391 if (pkt
->mask
& RXE_RWR_MASK
) {
393 return get_srq_wqe(qp
);
395 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
396 return (qp
->resp
.wqe
) ? RESPST_CHK_LENGTH
: RESPST_ERR_RNR
;
399 return RESPST_CHK_LENGTH
;
402 static enum resp_states
check_length(struct rxe_qp
*qp
,
403 struct rxe_pkt_info
*pkt
)
405 switch (qp_type(qp
)) {
407 return RESPST_CHK_RKEY
;
410 return RESPST_CHK_RKEY
;
413 return RESPST_CHK_RKEY
;
417 static enum resp_states
check_rkey(struct rxe_qp
*qp
,
418 struct rxe_pkt_info
*pkt
)
420 struct rxe_mem
*mem
= NULL
;
426 enum resp_states state
;
429 if (pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_MASK
)) {
430 if (pkt
->mask
& RXE_RETH_MASK
) {
431 qp
->resp
.va
= reth_va(pkt
);
432 qp
->resp
.rkey
= reth_rkey(pkt
);
433 qp
->resp
.resid
= reth_len(pkt
);
434 qp
->resp
.length
= reth_len(pkt
);
436 access
= (pkt
->mask
& RXE_READ_MASK
) ? IB_ACCESS_REMOTE_READ
437 : IB_ACCESS_REMOTE_WRITE
;
438 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
439 qp
->resp
.va
= atmeth_va(pkt
);
440 qp
->resp
.rkey
= atmeth_rkey(pkt
);
441 qp
->resp
.resid
= sizeof(u64
);
442 access
= IB_ACCESS_REMOTE_ATOMIC
;
444 return RESPST_EXECUTE
;
447 /* A zero-byte op is not required to set an addr or rkey. */
448 if ((pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_OR_SEND
)) &&
449 (pkt
->mask
& RXE_RETH_MASK
) &&
450 reth_len(pkt
) == 0) {
451 return RESPST_EXECUTE
;
455 rkey
= qp
->resp
.rkey
;
456 resid
= qp
->resp
.resid
;
457 pktlen
= payload_size(pkt
);
459 mem
= lookup_mem(qp
->pd
, access
, rkey
, lookup_remote
);
461 state
= RESPST_ERR_RKEY_VIOLATION
;
465 if (unlikely(mem
->state
== RXE_MEM_STATE_FREE
)) {
466 state
= RESPST_ERR_RKEY_VIOLATION
;
470 if (mem_check_range(mem
, va
, resid
)) {
471 state
= RESPST_ERR_RKEY_VIOLATION
;
475 if (pkt
->mask
& RXE_WRITE_MASK
) {
477 if (pktlen
!= mtu
|| bth_pad(pkt
)) {
478 state
= RESPST_ERR_LENGTH
;
482 if (pktlen
!= resid
) {
483 state
= RESPST_ERR_LENGTH
;
486 if ((bth_pad(pkt
) != (0x3 & (-resid
)))) {
487 /* This case may not be exactly that
488 * but nothing else fits.
490 state
= RESPST_ERR_LENGTH
;
496 WARN_ON_ONCE(qp
->resp
.mr
);
499 return RESPST_EXECUTE
;
507 static enum resp_states
send_data_in(struct rxe_qp
*qp
, void *data_addr
,
512 err
= copy_data(qp
->pd
, IB_ACCESS_LOCAL_WRITE
, &qp
->resp
.wqe
->dma
,
513 data_addr
, data_len
, to_mem_obj
, NULL
);
515 return (err
== -ENOSPC
) ? RESPST_ERR_LENGTH
516 : RESPST_ERR_MALFORMED_WQE
;
521 static enum resp_states
write_data_in(struct rxe_qp
*qp
,
522 struct rxe_pkt_info
*pkt
)
524 enum resp_states rc
= RESPST_NONE
;
526 int data_len
= payload_size(pkt
);
528 err
= rxe_mem_copy(qp
->resp
.mr
, qp
->resp
.va
, payload_addr(pkt
),
529 data_len
, to_mem_obj
, NULL
);
531 rc
= RESPST_ERR_RKEY_VIOLATION
;
535 qp
->resp
.va
+= data_len
;
536 qp
->resp
.resid
-= data_len
;
542 /* Guarantee atomicity of atomic operations at the machine level. */
543 static DEFINE_SPINLOCK(atomic_ops_lock
);
545 static enum resp_states
process_atomic(struct rxe_qp
*qp
,
546 struct rxe_pkt_info
*pkt
)
548 u64 iova
= atmeth_va(pkt
);
550 enum resp_states ret
;
551 struct rxe_mem
*mr
= qp
->resp
.mr
;
553 if (mr
->state
!= RXE_MEM_STATE_VALID
) {
554 ret
= RESPST_ERR_RKEY_VIOLATION
;
558 vaddr
= iova_to_vaddr(mr
, iova
, sizeof(u64
));
560 /* check vaddr is 8 bytes aligned. */
561 if (!vaddr
|| (uintptr_t)vaddr
& 7) {
562 ret
= RESPST_ERR_MISALIGNED_ATOMIC
;
566 spin_lock_bh(&atomic_ops_lock
);
568 qp
->resp
.atomic_orig
= *vaddr
;
570 if (pkt
->opcode
== IB_OPCODE_RC_COMPARE_SWAP
||
571 pkt
->opcode
== IB_OPCODE_RD_COMPARE_SWAP
) {
572 if (*vaddr
== atmeth_comp(pkt
))
573 *vaddr
= atmeth_swap_add(pkt
);
575 *vaddr
+= atmeth_swap_add(pkt
);
578 spin_unlock_bh(&atomic_ops_lock
);
585 static struct sk_buff
*prepare_ack_packet(struct rxe_qp
*qp
,
586 struct rxe_pkt_info
*pkt
,
587 struct rxe_pkt_info
*ack
,
594 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
605 pad
= (-payload
) & 0x3;
606 paylen
= rxe_opcode
[opcode
].length
+ payload
+ pad
+ RXE_ICRC_SIZE
;
608 skb
= rxe_init_packet(rxe
, &qp
->pri_av
, paylen
, ack
);
613 ack
->opcode
= opcode
;
614 ack
->mask
= rxe_opcode
[opcode
].mask
;
615 ack
->offset
= pkt
->offset
;
616 ack
->paylen
= paylen
;
618 /* fill in bth using the request packet headers */
619 memcpy(ack
->hdr
, pkt
->hdr
, pkt
->offset
+ RXE_BTH_BYTES
);
621 bth_set_opcode(ack
, opcode
);
622 bth_set_qpn(ack
, qp
->attr
.dest_qp_num
);
623 bth_set_pad(ack
, pad
);
625 bth_set_psn(ack
, psn
);
629 if (ack
->mask
& RXE_AETH_MASK
) {
630 aeth_set_syn(ack
, syndrome
);
631 aeth_set_msn(ack
, qp
->resp
.msn
);
634 if (ack
->mask
& RXE_ATMACK_MASK
)
635 atmack_set_orig(ack
, qp
->resp
.atomic_orig
);
637 err
= rxe_prepare(ack
, skb
, &crc
);
644 /* CRC computation will be continued by the caller */
647 p
= payload_addr(ack
) + payload
+ bth_pad(ack
);
654 /* RDMA read response. If res is not NULL, then we have a current RDMA request
655 * being processed or replayed.
657 static enum resp_states
read_reply(struct rxe_qp
*qp
,
658 struct rxe_pkt_info
*req_pkt
)
660 struct rxe_pkt_info ack_pkt
;
663 enum resp_states state
;
667 struct resp_res
*res
= qp
->resp
.res
;
672 /* This is the first time we process that request. Get a
675 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
677 free_rd_atomic_resource(qp
, res
);
678 rxe_advance_resp_resource(qp
);
680 res
->type
= RXE_READ_MASK
;
683 res
->read
.va
= qp
->resp
.va
;
684 res
->read
.va_org
= qp
->resp
.va
;
686 res
->first_psn
= req_pkt
->psn
;
688 if (reth_len(req_pkt
)) {
689 res
->last_psn
= (req_pkt
->psn
+
690 (reth_len(req_pkt
) + mtu
- 1) /
691 mtu
- 1) & BTH_PSN_MASK
;
693 res
->last_psn
= res
->first_psn
;
695 res
->cur_psn
= req_pkt
->psn
;
697 res
->read
.resid
= qp
->resp
.resid
;
698 res
->read
.length
= qp
->resp
.resid
;
699 res
->read
.rkey
= qp
->resp
.rkey
;
701 /* note res inherits the reference to mr from qp */
702 res
->read
.mr
= qp
->resp
.mr
;
706 res
->state
= rdatm_res_state_new
;
709 if (res
->state
== rdatm_res_state_new
) {
710 if (res
->read
.resid
<= mtu
)
711 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY
;
713 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST
;
715 if (res
->read
.resid
> mtu
)
716 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
;
718 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST
;
721 res
->state
= rdatm_res_state_next
;
723 payload
= min_t(int, res
->read
.resid
, mtu
);
725 skb
= prepare_ack_packet(qp
, req_pkt
, &ack_pkt
, opcode
, payload
,
726 res
->cur_psn
, AETH_ACK_UNLIMITED
, &icrc
);
728 return RESPST_ERR_RNR
;
730 err
= rxe_mem_copy(res
->read
.mr
, res
->read
.va
, payload_addr(&ack_pkt
),
731 payload
, from_mem_obj
, &icrc
);
733 pr_err("Failed copying memory\n");
735 if (bth_pad(&ack_pkt
)) {
736 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
737 u8
*pad
= payload_addr(&ack_pkt
) + payload
;
739 memset(pad
, 0, bth_pad(&ack_pkt
));
740 icrc
= rxe_crc32(rxe
, icrc
, pad
, bth_pad(&ack_pkt
));
742 p
= payload_addr(&ack_pkt
) + payload
+ bth_pad(&ack_pkt
);
745 err
= rxe_xmit_packet(qp
, &ack_pkt
, skb
);
747 pr_err("Failed sending RDMA reply.\n");
748 return RESPST_ERR_RNR
;
751 res
->read
.va
+= payload
;
752 res
->read
.resid
-= payload
;
753 res
->cur_psn
= (res
->cur_psn
+ 1) & BTH_PSN_MASK
;
755 if (res
->read
.resid
> 0) {
760 qp
->resp
.opcode
= -1;
761 if (psn_compare(res
->cur_psn
, qp
->resp
.psn
) >= 0)
762 qp
->resp
.psn
= res
->cur_psn
;
763 state
= RESPST_CLEANUP
;
769 static void build_rdma_network_hdr(union rdma_network_hdr
*hdr
,
770 struct rxe_pkt_info
*pkt
)
772 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
774 memset(hdr
, 0, sizeof(*hdr
));
775 if (skb
->protocol
== htons(ETH_P_IP
))
776 memcpy(&hdr
->roce4grh
, ip_hdr(skb
), sizeof(hdr
->roce4grh
));
777 else if (skb
->protocol
== htons(ETH_P_IPV6
))
778 memcpy(&hdr
->ibgrh
, ipv6_hdr(skb
), sizeof(hdr
->ibgrh
));
781 /* Executes a new request. A retried request never reach that function (send
782 * and writes are discarded, and reads and atomics are retried elsewhere.
784 static enum resp_states
execute(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
)
786 enum resp_states err
;
788 if (pkt
->mask
& RXE_SEND_MASK
) {
789 if (qp_type(qp
) == IB_QPT_UD
||
790 qp_type(qp
) == IB_QPT_SMI
||
791 qp_type(qp
) == IB_QPT_GSI
) {
792 union rdma_network_hdr hdr
;
794 build_rdma_network_hdr(&hdr
, pkt
);
796 err
= send_data_in(qp
, &hdr
, sizeof(hdr
));
800 err
= send_data_in(qp
, payload_addr(pkt
), payload_size(pkt
));
803 } else if (pkt
->mask
& RXE_WRITE_MASK
) {
804 err
= write_data_in(qp
, pkt
);
807 } else if (pkt
->mask
& RXE_READ_MASK
) {
808 /* For RDMA Read we can increment the msn now. See C9-148. */
810 return RESPST_READ_REPLY
;
811 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
812 err
= process_atomic(qp
, pkt
);
820 /* next expected psn, read handles this separately */
821 qp
->resp
.psn
= (pkt
->psn
+ 1) & BTH_PSN_MASK
;
822 qp
->resp
.ack_psn
= qp
->resp
.psn
;
824 qp
->resp
.opcode
= pkt
->opcode
;
825 qp
->resp
.status
= IB_WC_SUCCESS
;
827 if (pkt
->mask
& RXE_COMP_MASK
) {
828 /* We successfully processed this new request. */
830 return RESPST_COMPLETE
;
831 } else if (qp_type(qp
) == IB_QPT_RC
)
832 return RESPST_ACKNOWLEDGE
;
834 return RESPST_CLEANUP
;
837 static enum resp_states
do_complete(struct rxe_qp
*qp
,
838 struct rxe_pkt_info
*pkt
)
841 struct ib_wc
*wc
= &cqe
.ibwc
;
842 struct ib_uverbs_wc
*uwc
= &cqe
.uibwc
;
843 struct rxe_recv_wqe
*wqe
= qp
->resp
.wqe
;
844 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
847 return RESPST_CLEANUP
;
849 memset(&cqe
, 0, sizeof(cqe
));
851 if (qp
->rcq
->is_user
) {
852 uwc
->status
= qp
->resp
.status
;
853 uwc
->qp_num
= qp
->ibqp
.qp_num
;
854 uwc
->wr_id
= wqe
->wr_id
;
856 wc
->status
= qp
->resp
.status
;
858 wc
->wr_id
= wqe
->wr_id
;
861 if (wc
->status
== IB_WC_SUCCESS
) {
862 rxe_counter_inc(rxe
, RXE_CNT_RDMA_RECV
);
863 wc
->opcode
= (pkt
->mask
& RXE_IMMDT_MASK
&&
864 pkt
->mask
& RXE_WRITE_MASK
) ?
865 IB_WC_RECV_RDMA_WITH_IMM
: IB_WC_RECV
;
867 wc
->byte_len
= (pkt
->mask
& RXE_IMMDT_MASK
&&
868 pkt
->mask
& RXE_WRITE_MASK
) ?
869 qp
->resp
.length
: wqe
->dma
.length
- wqe
->dma
.resid
;
871 /* fields after byte_len are different between kernel and user
874 if (qp
->rcq
->is_user
) {
875 uwc
->wc_flags
= IB_WC_GRH
;
877 if (pkt
->mask
& RXE_IMMDT_MASK
) {
878 uwc
->wc_flags
|= IB_WC_WITH_IMM
;
879 uwc
->ex
.imm_data
= immdt_imm(pkt
);
882 if (pkt
->mask
& RXE_IETH_MASK
) {
883 uwc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
884 uwc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
887 uwc
->qp_num
= qp
->ibqp
.qp_num
;
889 if (pkt
->mask
& RXE_DETH_MASK
)
890 uwc
->src_qp
= deth_sqp(pkt
);
892 uwc
->port_num
= qp
->attr
.port_num
;
894 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
896 wc
->wc_flags
= IB_WC_GRH
| IB_WC_WITH_NETWORK_HDR_TYPE
;
897 if (skb
->protocol
== htons(ETH_P_IP
))
898 wc
->network_hdr_type
= RDMA_NETWORK_IPV4
;
900 wc
->network_hdr_type
= RDMA_NETWORK_IPV6
;
902 if (is_vlan_dev(skb
->dev
)) {
903 wc
->wc_flags
|= IB_WC_WITH_VLAN
;
904 wc
->vlan_id
= vlan_dev_vlan_id(skb
->dev
);
907 if (pkt
->mask
& RXE_IMMDT_MASK
) {
908 wc
->wc_flags
|= IB_WC_WITH_IMM
;
909 wc
->ex
.imm_data
= immdt_imm(pkt
);
912 if (pkt
->mask
& RXE_IETH_MASK
) {
915 wc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
916 wc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
918 rmr
= rxe_pool_get_index(&rxe
->mr_pool
,
919 wc
->ex
.invalidate_rkey
>> 8);
920 if (unlikely(!rmr
)) {
921 pr_err("Bad rkey %#x invalidation\n",
922 wc
->ex
.invalidate_rkey
);
925 rmr
->state
= RXE_MEM_STATE_FREE
;
931 if (pkt
->mask
& RXE_DETH_MASK
)
932 wc
->src_qp
= deth_sqp(pkt
);
934 wc
->port_num
= qp
->attr
.port_num
;
938 /* have copy for srq and reference for !srq */
940 advance_consumer(qp
->rq
.queue
);
944 if (rxe_cq_post(qp
->rcq
, &cqe
, pkt
? bth_se(pkt
) : 1))
945 return RESPST_ERR_CQ_OVERFLOW
;
947 if (qp
->resp
.state
== QP_STATE_ERROR
)
948 return RESPST_CHK_RESOURCE
;
952 else if (qp_type(qp
) == IB_QPT_RC
)
953 return RESPST_ACKNOWLEDGE
;
955 return RESPST_CLEANUP
;
958 static int send_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
959 u8 syndrome
, u32 psn
)
962 struct rxe_pkt_info ack_pkt
;
965 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
, IB_OPCODE_RC_ACKNOWLEDGE
,
966 0, psn
, syndrome
, NULL
);
972 err
= rxe_xmit_packet(qp
, &ack_pkt
, skb
);
974 pr_err_ratelimited("Failed sending ack\n");
980 static int send_atomic_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
984 struct rxe_pkt_info ack_pkt
;
986 struct resp_res
*res
;
988 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
,
989 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE
, 0, pkt
->psn
,
998 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
999 free_rd_atomic_resource(qp
, res
);
1000 rxe_advance_resp_resource(qp
);
1002 memcpy(SKB_TO_PKT(skb
), &ack_pkt
, sizeof(ack_pkt
));
1003 memset((unsigned char *)SKB_TO_PKT(skb
) + sizeof(ack_pkt
), 0,
1004 sizeof(skb
->cb
) - sizeof(ack_pkt
));
1007 res
->type
= RXE_ATOMIC_MASK
;
1008 res
->atomic
.skb
= skb
;
1009 res
->first_psn
= ack_pkt
.psn
;
1010 res
->last_psn
= ack_pkt
.psn
;
1011 res
->cur_psn
= ack_pkt
.psn
;
1013 rc
= rxe_xmit_packet(qp
, &ack_pkt
, skb
);
1015 pr_err_ratelimited("Failed sending ack\n");
1022 static enum resp_states
acknowledge(struct rxe_qp
*qp
,
1023 struct rxe_pkt_info
*pkt
)
1025 if (qp_type(qp
) != IB_QPT_RC
)
1026 return RESPST_CLEANUP
;
1028 if (qp
->resp
.aeth_syndrome
!= AETH_ACK_UNLIMITED
)
1029 send_ack(qp
, pkt
, qp
->resp
.aeth_syndrome
, pkt
->psn
);
1030 else if (pkt
->mask
& RXE_ATOMIC_MASK
)
1031 send_atomic_ack(qp
, pkt
, AETH_ACK_UNLIMITED
);
1032 else if (bth_ack(pkt
))
1033 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, pkt
->psn
);
1035 return RESPST_CLEANUP
;
1038 static enum resp_states
cleanup(struct rxe_qp
*qp
,
1039 struct rxe_pkt_info
*pkt
)
1041 struct sk_buff
*skb
;
1044 skb
= skb_dequeue(&qp
->req_pkts
);
1050 rxe_drop_ref(qp
->resp
.mr
);
1057 static struct resp_res
*find_resource(struct rxe_qp
*qp
, u32 psn
)
1061 for (i
= 0; i
< qp
->attr
.max_dest_rd_atomic
; i
++) {
1062 struct resp_res
*res
= &qp
->resp
.resources
[i
];
1067 if (psn_compare(psn
, res
->first_psn
) >= 0 &&
1068 psn_compare(psn
, res
->last_psn
) <= 0) {
1076 static enum resp_states
duplicate_request(struct rxe_qp
*qp
,
1077 struct rxe_pkt_info
*pkt
)
1079 enum resp_states rc
;
1080 u32 prev_psn
= (qp
->resp
.ack_psn
- 1) & BTH_PSN_MASK
;
1082 if (pkt
->mask
& RXE_SEND_MASK
||
1083 pkt
->mask
& RXE_WRITE_MASK
) {
1084 /* SEND. Ack again and cleanup. C9-105. */
1086 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, prev_psn
);
1087 rc
= RESPST_CLEANUP
;
1089 } else if (pkt
->mask
& RXE_READ_MASK
) {
1090 struct resp_res
*res
;
1092 res
= find_resource(qp
, pkt
->psn
);
1094 /* Resource not found. Class D error. Drop the
1097 rc
= RESPST_CLEANUP
;
1100 /* Ensure this new request is the same as the previous
1101 * one or a subset of it.
1103 u64 iova
= reth_va(pkt
);
1104 u32 resid
= reth_len(pkt
);
1106 if (iova
< res
->read
.va_org
||
1107 resid
> res
->read
.length
||
1108 (iova
+ resid
) > (res
->read
.va_org
+
1109 res
->read
.length
)) {
1110 rc
= RESPST_CLEANUP
;
1114 if (reth_rkey(pkt
) != res
->read
.rkey
) {
1115 rc
= RESPST_CLEANUP
;
1119 res
->cur_psn
= pkt
->psn
;
1120 res
->state
= (pkt
->psn
== res
->first_psn
) ?
1121 rdatm_res_state_new
:
1122 rdatm_res_state_replay
;
1125 /* Reset the resource, except length. */
1126 res
->read
.va_org
= iova
;
1127 res
->read
.va
= iova
;
1128 res
->read
.resid
= resid
;
1130 /* Replay the RDMA read reply. */
1132 rc
= RESPST_READ_REPLY
;
1136 struct resp_res
*res
;
1138 /* Find the operation in our list of responder resources. */
1139 res
= find_resource(qp
, pkt
->psn
);
1141 skb_get(res
->atomic
.skb
);
1142 /* Resend the result. */
1143 rc
= rxe_xmit_packet(qp
, pkt
, res
->atomic
.skb
);
1145 pr_err("Failed resending result. This flow is not handled - skb ignored\n");
1146 rc
= RESPST_CLEANUP
;
1151 /* Resource not found. Class D error. Drop the request. */
1152 rc
= RESPST_CLEANUP
;
1159 /* Process a class A or C. Both are treated the same in this implementation. */
1160 static void do_class_ac_error(struct rxe_qp
*qp
, u8 syndrome
,
1161 enum ib_wc_status status
)
1163 qp
->resp
.aeth_syndrome
= syndrome
;
1164 qp
->resp
.status
= status
;
1166 /* indicate that we should go through the ERROR state */
1167 qp
->resp
.goto_error
= 1;
1170 static enum resp_states
do_class_d1e_error(struct rxe_qp
*qp
)
1175 qp
->resp
.drop_msg
= 1;
1177 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1178 return RESPST_COMPLETE
;
1180 return RESPST_CLEANUP
;
1183 /* Class D1. This packet may be the start of a
1184 * new message and could be valid. The previous
1185 * message is invalid and ignored. reset the
1186 * recv wr to its original state
1189 qp
->resp
.wqe
->dma
.resid
= qp
->resp
.wqe
->dma
.length
;
1190 qp
->resp
.wqe
->dma
.cur_sge
= 0;
1191 qp
->resp
.wqe
->dma
.sge_offset
= 0;
1192 qp
->resp
.opcode
= -1;
1196 rxe_drop_ref(qp
->resp
.mr
);
1200 return RESPST_CLEANUP
;
1204 static void rxe_drain_req_pkts(struct rxe_qp
*qp
, bool notify
)
1206 struct sk_buff
*skb
;
1208 while ((skb
= skb_dequeue(&qp
->req_pkts
))) {
1216 while (!qp
->srq
&& qp
->rq
.queue
&& queue_head(qp
->rq
.queue
))
1217 advance_consumer(qp
->rq
.queue
);
1220 int rxe_responder(void *arg
)
1222 struct rxe_qp
*qp
= (struct rxe_qp
*)arg
;
1223 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
1224 enum resp_states state
;
1225 struct rxe_pkt_info
*pkt
= NULL
;
1230 qp
->resp
.aeth_syndrome
= AETH_ACK_UNLIMITED
;
1237 switch (qp
->resp
.state
) {
1238 case QP_STATE_RESET
:
1239 state
= RESPST_RESET
;
1243 state
= RESPST_GET_REQ
;
1248 pr_debug("qp#%d state = %s\n", qp_num(qp
),
1249 resp_state_name
[state
]);
1251 case RESPST_GET_REQ
:
1252 state
= get_req(qp
, &pkt
);
1254 case RESPST_CHK_PSN
:
1255 state
= check_psn(qp
, pkt
);
1257 case RESPST_CHK_OP_SEQ
:
1258 state
= check_op_seq(qp
, pkt
);
1260 case RESPST_CHK_OP_VALID
:
1261 state
= check_op_valid(qp
, pkt
);
1263 case RESPST_CHK_RESOURCE
:
1264 state
= check_resource(qp
, pkt
);
1266 case RESPST_CHK_LENGTH
:
1267 state
= check_length(qp
, pkt
);
1269 case RESPST_CHK_RKEY
:
1270 state
= check_rkey(qp
, pkt
);
1272 case RESPST_EXECUTE
:
1273 state
= execute(qp
, pkt
);
1275 case RESPST_COMPLETE
:
1276 state
= do_complete(qp
, pkt
);
1278 case RESPST_READ_REPLY
:
1279 state
= read_reply(qp
, pkt
);
1281 case RESPST_ACKNOWLEDGE
:
1282 state
= acknowledge(qp
, pkt
);
1284 case RESPST_CLEANUP
:
1285 state
= cleanup(qp
, pkt
);
1287 case RESPST_DUPLICATE_REQUEST
:
1288 state
= duplicate_request(qp
, pkt
);
1290 case RESPST_ERR_PSN_OUT_OF_SEQ
:
1291 /* RC only - Class B. Drop packet. */
1292 send_ack(qp
, pkt
, AETH_NAK_PSN_SEQ_ERROR
, qp
->resp
.psn
);
1293 state
= RESPST_CLEANUP
;
1296 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
:
1297 case RESPST_ERR_MISSING_OPCODE_FIRST
:
1298 case RESPST_ERR_MISSING_OPCODE_LAST_C
:
1299 case RESPST_ERR_UNSUPPORTED_OPCODE
:
1300 case RESPST_ERR_MISALIGNED_ATOMIC
:
1301 /* RC Only - Class C. */
1302 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1303 IB_WC_REM_INV_REQ_ERR
);
1304 state
= RESPST_COMPLETE
;
1307 case RESPST_ERR_MISSING_OPCODE_LAST_D1E
:
1308 state
= do_class_d1e_error(qp
);
1310 case RESPST_ERR_RNR
:
1311 if (qp_type(qp
) == IB_QPT_RC
) {
1312 rxe_counter_inc(rxe
, RXE_CNT_SND_RNR
);
1314 send_ack(qp
, pkt
, AETH_RNR_NAK
|
1316 qp
->attr
.min_rnr_timer
),
1319 /* UD/UC - class D */
1320 qp
->resp
.drop_msg
= 1;
1322 state
= RESPST_CLEANUP
;
1325 case RESPST_ERR_RKEY_VIOLATION
:
1326 if (qp_type(qp
) == IB_QPT_RC
) {
1328 do_class_ac_error(qp
, AETH_NAK_REM_ACC_ERR
,
1329 IB_WC_REM_ACCESS_ERR
);
1330 state
= RESPST_COMPLETE
;
1332 qp
->resp
.drop_msg
= 1;
1334 /* UC/SRQ Class D */
1335 qp
->resp
.status
= IB_WC_REM_ACCESS_ERR
;
1336 state
= RESPST_COMPLETE
;
1338 /* UC/non-SRQ Class E. */
1339 state
= RESPST_CLEANUP
;
1344 case RESPST_ERR_LENGTH
:
1345 if (qp_type(qp
) == IB_QPT_RC
) {
1347 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1348 IB_WC_REM_INV_REQ_ERR
);
1349 state
= RESPST_COMPLETE
;
1350 } else if (qp
->srq
) {
1351 /* UC/UD - class E */
1352 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1353 state
= RESPST_COMPLETE
;
1355 /* UC/UD - class D */
1356 qp
->resp
.drop_msg
= 1;
1357 state
= RESPST_CLEANUP
;
1361 case RESPST_ERR_MALFORMED_WQE
:
1363 do_class_ac_error(qp
, AETH_NAK_REM_OP_ERR
,
1364 IB_WC_LOC_QP_OP_ERR
);
1365 state
= RESPST_COMPLETE
;
1368 case RESPST_ERR_CQ_OVERFLOW
:
1370 state
= RESPST_ERROR
;
1374 if (qp
->resp
.goto_error
) {
1375 state
= RESPST_ERROR
;
1382 if (qp
->resp
.goto_error
) {
1383 state
= RESPST_ERROR
;
1390 rxe_drain_req_pkts(qp
, false);
1391 qp
->resp
.wqe
= NULL
;
1395 qp
->resp
.goto_error
= 0;
1396 pr_warn("qp#%d moved to error state\n", qp_num(qp
));