1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
3 * Copyright (c) 2014-2017 Oracle. All rights reserved.
4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the BSD-type
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
16 * Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
19 * Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials provided
22 * with the distribution.
24 * Neither the name of the Network Appliance, Inc. nor the names of
25 * its contributors may be used to endorse or promote products
26 * derived from this software without specific prior written
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 * Encapsulates the major functions managing:
52 #include <linux/interrupt.h>
53 #include <linux/slab.h>
54 #include <linux/sunrpc/addr.h>
55 #include <linux/sunrpc/svc_rdma.h>
57 #include <asm-generic/barrier.h>
58 #include <asm/bitops.h>
60 #include <rdma/ib_cm.h>
62 #include "xprt_rdma.h"
63 #include <trace/events/rpcrdma.h>
69 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
70 # define RPCDBG_FACILITY RPCDBG_TRANS
76 static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx
*sc
);
77 static void rpcrdma_mrs_create(struct rpcrdma_xprt
*r_xprt
);
78 static void rpcrdma_mrs_destroy(struct rpcrdma_buffer
*buf
);
79 static int rpcrdma_create_rep(struct rpcrdma_xprt
*r_xprt
, bool temp
);
80 static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf
*rb
);
82 struct workqueue_struct
*rpcrdma_receive_wq __read_mostly
;
85 rpcrdma_alloc_wq(void)
87 struct workqueue_struct
*recv_wq
;
89 recv_wq
= alloc_workqueue("xprtrdma_receive",
90 WQ_MEM_RECLAIM
| WQ_HIGHPRI
,
95 rpcrdma_receive_wq
= recv_wq
;
100 rpcrdma_destroy_wq(void)
102 struct workqueue_struct
*wq
;
104 if (rpcrdma_receive_wq
) {
105 wq
= rpcrdma_receive_wq
;
106 rpcrdma_receive_wq
= NULL
;
107 destroy_workqueue(wq
);
112 * rpcrdma_disconnect_worker - Force a disconnect
113 * @work: endpoint to be disconnected
115 * Provider callbacks can possibly run in an IRQ context. This function
116 * is invoked in a worker thread to guarantee that disconnect wake-up
117 * calls are always done in process context.
120 rpcrdma_disconnect_worker(struct work_struct
*work
)
122 struct rpcrdma_ep
*ep
= container_of(work
, struct rpcrdma_ep
,
123 rep_disconnect_worker
.work
);
124 struct rpcrdma_xprt
*r_xprt
=
125 container_of(ep
, struct rpcrdma_xprt
, rx_ep
);
127 xprt_force_disconnect(&r_xprt
->rx_xprt
);
131 * rpcrdma_qp_event_handler - Handle one QP event (error notification)
132 * @event: details of the event
133 * @context: ep that owns QP where event occurred
135 * Called from the RDMA provider (device driver) possibly in an interrupt
139 rpcrdma_qp_event_handler(struct ib_event
*event
, void *context
)
141 struct rpcrdma_ep
*ep
= context
;
142 struct rpcrdma_xprt
*r_xprt
= container_of(ep
, struct rpcrdma_xprt
,
145 trace_xprtrdma_qp_event(r_xprt
, event
);
146 pr_err("rpcrdma: %s on device %s connected to %s:%s\n",
147 ib_event_msg(event
->event
), event
->device
->name
,
148 rpcrdma_addrstr(r_xprt
), rpcrdma_portstr(r_xprt
));
150 if (ep
->rep_connected
== 1) {
151 ep
->rep_connected
= -EIO
;
152 schedule_delayed_work(&ep
->rep_disconnect_worker
, 0);
153 wake_up_all(&ep
->rep_connect_wait
);
158 * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC
159 * @cq: completion queue (ignored)
164 rpcrdma_wc_send(struct ib_cq
*cq
, struct ib_wc
*wc
)
166 struct ib_cqe
*cqe
= wc
->wr_cqe
;
167 struct rpcrdma_sendctx
*sc
=
168 container_of(cqe
, struct rpcrdma_sendctx
, sc_cqe
);
170 /* WARNING: Only wr_cqe and status are reliable at this point */
171 trace_xprtrdma_wc_send(sc
, wc
);
172 if (wc
->status
!= IB_WC_SUCCESS
&& wc
->status
!= IB_WC_WR_FLUSH_ERR
)
173 pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
174 ib_wc_status_msg(wc
->status
),
175 wc
->status
, wc
->vendor_err
);
177 rpcrdma_sendctx_put_locked(sc
);
181 * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
182 * @cq: completion queue (ignored)
187 rpcrdma_wc_receive(struct ib_cq
*cq
, struct ib_wc
*wc
)
189 struct ib_cqe
*cqe
= wc
->wr_cqe
;
190 struct rpcrdma_rep
*rep
= container_of(cqe
, struct rpcrdma_rep
,
193 /* WARNING: Only wr_id and status are reliable at this point */
194 trace_xprtrdma_wc_receive(wc
);
195 if (wc
->status
!= IB_WC_SUCCESS
)
198 /* status == SUCCESS means all fields in wc are trustworthy */
199 rpcrdma_set_xdrlen(&rep
->rr_hdrbuf
, wc
->byte_len
);
200 rep
->rr_wc_flags
= wc
->wc_flags
;
201 rep
->rr_inv_rkey
= wc
->ex
.invalidate_rkey
;
203 ib_dma_sync_single_for_cpu(rdmab_device(rep
->rr_rdmabuf
),
204 rdmab_addr(rep
->rr_rdmabuf
),
205 wc
->byte_len
, DMA_FROM_DEVICE
);
208 rpcrdma_reply_handler(rep
);
212 if (wc
->status
!= IB_WC_WR_FLUSH_ERR
)
213 pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
214 ib_wc_status_msg(wc
->status
),
215 wc
->status
, wc
->vendor_err
);
216 rpcrdma_set_xdrlen(&rep
->rr_hdrbuf
, 0);
221 rpcrdma_update_connect_private(struct rpcrdma_xprt
*r_xprt
,
222 struct rdma_conn_param
*param
)
224 struct rpcrdma_create_data_internal
*cdata
= &r_xprt
->rx_data
;
225 const struct rpcrdma_connect_private
*pmsg
= param
->private_data
;
226 unsigned int rsize
, wsize
;
228 /* Default settings for RPC-over-RDMA Version One */
229 r_xprt
->rx_ia
.ri_implicit_roundup
= xprt_rdma_pad_optimize
;
230 rsize
= RPCRDMA_V1_DEF_INLINE_SIZE
;
231 wsize
= RPCRDMA_V1_DEF_INLINE_SIZE
;
234 pmsg
->cp_magic
== rpcrdma_cmp_magic
&&
235 pmsg
->cp_version
== RPCRDMA_CMP_VERSION
) {
236 r_xprt
->rx_ia
.ri_implicit_roundup
= true;
237 rsize
= rpcrdma_decode_buffer_size(pmsg
->cp_send_size
);
238 wsize
= rpcrdma_decode_buffer_size(pmsg
->cp_recv_size
);
241 if (rsize
< cdata
->inline_rsize
)
242 cdata
->inline_rsize
= rsize
;
243 if (wsize
< cdata
->inline_wsize
)
244 cdata
->inline_wsize
= wsize
;
245 dprintk("RPC: %s: max send %u, max recv %u\n",
246 __func__
, cdata
->inline_wsize
, cdata
->inline_rsize
);
247 rpcrdma_set_max_header_sizes(r_xprt
);
251 * rpcrdma_cm_event_handler - Handle RDMA CM events
252 * @id: rdma_cm_id on which an event has occurred
253 * @event: details of the event
255 * Called with @id's mutex held. Returns 1 if caller should
256 * destroy @id, otherwise 0.
259 rpcrdma_cm_event_handler(struct rdma_cm_id
*id
, struct rdma_cm_event
*event
)
261 struct rpcrdma_xprt
*r_xprt
= id
->context
;
262 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
263 struct rpcrdma_ep
*ep
= &r_xprt
->rx_ep
;
264 struct rpc_xprt
*xprt
= &r_xprt
->rx_xprt
;
268 trace_xprtrdma_cm_event(r_xprt
, event
);
269 switch (event
->event
) {
270 case RDMA_CM_EVENT_ADDR_RESOLVED
:
271 case RDMA_CM_EVENT_ROUTE_RESOLVED
:
273 complete(&ia
->ri_done
);
275 case RDMA_CM_EVENT_ADDR_ERROR
:
276 ia
->ri_async_rc
= -EPROTO
;
277 complete(&ia
->ri_done
);
279 case RDMA_CM_EVENT_ROUTE_ERROR
:
280 ia
->ri_async_rc
= -ENETUNREACH
;
281 complete(&ia
->ri_done
);
283 case RDMA_CM_EVENT_DEVICE_REMOVAL
:
284 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
285 pr_info("rpcrdma: removing device %s for %s:%s\n",
287 rpcrdma_addrstr(r_xprt
), rpcrdma_portstr(r_xprt
));
289 set_bit(RPCRDMA_IAF_REMOVING
, &ia
->ri_flags
);
290 ep
->rep_connected
= -ENODEV
;
291 xprt_force_disconnect(xprt
);
292 wait_for_completion(&ia
->ri_remove_done
);
295 ia
->ri_device
= NULL
;
296 /* Return 1 to ensure the core destroys the id. */
298 case RDMA_CM_EVENT_ESTABLISHED
:
299 ++xprt
->connect_cookie
;
300 ep
->rep_connected
= 1;
301 rpcrdma_update_connect_private(r_xprt
, &event
->param
.conn
);
302 wake_up_all(&ep
->rep_connect_wait
);
304 case RDMA_CM_EVENT_CONNECT_ERROR
:
305 ep
->rep_connected
= -ENOTCONN
;
307 case RDMA_CM_EVENT_UNREACHABLE
:
308 ep
->rep_connected
= -ENETUNREACH
;
310 case RDMA_CM_EVENT_REJECTED
:
311 dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
312 rpcrdma_addrstr(r_xprt
), rpcrdma_portstr(r_xprt
),
313 rdma_reject_msg(id
, event
->status
));
314 ep
->rep_connected
= -ECONNREFUSED
;
315 if (event
->status
== IB_CM_REJ_STALE_CONN
)
316 ep
->rep_connected
= -EAGAIN
;
318 case RDMA_CM_EVENT_DISCONNECTED
:
319 ++xprt
->connect_cookie
;
320 ep
->rep_connected
= -ECONNABORTED
;
322 xprt_force_disconnect(xprt
);
323 wake_up_all(&ep
->rep_connect_wait
);
329 dprintk("RPC: %s: %s:%s on %s/%s: %s\n", __func__
,
330 rpcrdma_addrstr(r_xprt
), rpcrdma_portstr(r_xprt
),
331 ia
->ri_device
->name
, ia
->ri_ops
->ro_displayname
,
332 rdma_event_msg(event
->event
));
336 static struct rdma_cm_id
*
337 rpcrdma_create_id(struct rpcrdma_xprt
*xprt
, struct rpcrdma_ia
*ia
)
339 unsigned long wtimeout
= msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT
) + 1;
340 struct rdma_cm_id
*id
;
343 trace_xprtrdma_conn_start(xprt
);
345 init_completion(&ia
->ri_done
);
346 init_completion(&ia
->ri_remove_done
);
348 id
= rdma_create_id(xprt
->rx_xprt
.xprt_net
, rpcrdma_cm_event_handler
,
349 xprt
, RDMA_PS_TCP
, IB_QPT_RC
);
352 dprintk("RPC: %s: rdma_create_id() failed %i\n",
357 ia
->ri_async_rc
= -ETIMEDOUT
;
358 rc
= rdma_resolve_addr(id
, NULL
,
359 (struct sockaddr
*)&xprt
->rx_xprt
.addr
,
360 RDMA_RESOLVE_TIMEOUT
);
362 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
366 rc
= wait_for_completion_interruptible_timeout(&ia
->ri_done
, wtimeout
);
368 trace_xprtrdma_conn_tout(xprt
);
372 rc
= ia
->ri_async_rc
;
376 ia
->ri_async_rc
= -ETIMEDOUT
;
377 rc
= rdma_resolve_route(id
, RDMA_RESOLVE_TIMEOUT
);
379 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
383 rc
= wait_for_completion_interruptible_timeout(&ia
->ri_done
, wtimeout
);
385 trace_xprtrdma_conn_tout(xprt
);
388 rc
= ia
->ri_async_rc
;
400 * Exported functions.
404 * rpcrdma_ia_open - Open and initialize an Interface Adapter.
405 * @xprt: transport with IA to (re)initialize
407 * Returns 0 on success, negative errno if an appropriate
408 * Interface Adapter could not be found and opened.
411 rpcrdma_ia_open(struct rpcrdma_xprt
*xprt
)
413 struct rpcrdma_ia
*ia
= &xprt
->rx_ia
;
416 ia
->ri_id
= rpcrdma_create_id(xprt
, ia
);
417 if (IS_ERR(ia
->ri_id
)) {
418 rc
= PTR_ERR(ia
->ri_id
);
421 ia
->ri_device
= ia
->ri_id
->device
;
423 ia
->ri_pd
= ib_alloc_pd(ia
->ri_device
, 0);
424 if (IS_ERR(ia
->ri_pd
)) {
425 rc
= PTR_ERR(ia
->ri_pd
);
426 pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc
);
430 switch (xprt_rdma_memreg_strategy
) {
432 if (frwr_is_supported(ia
)) {
433 ia
->ri_ops
= &rpcrdma_frwr_memreg_ops
;
437 case RPCRDMA_MTHCAFMR
:
438 if (fmr_is_supported(ia
)) {
439 ia
->ri_ops
= &rpcrdma_fmr_memreg_ops
;
444 pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
445 ia
->ri_device
->name
, xprt_rdma_memreg_strategy
);
453 rpcrdma_ia_close(ia
);
458 * rpcrdma_ia_remove - Handle device driver unload
459 * @ia: interface adapter being removed
461 * Divest transport H/W resources associated with this adapter,
462 * but allow it to be restored later.
465 rpcrdma_ia_remove(struct rpcrdma_ia
*ia
)
467 struct rpcrdma_xprt
*r_xprt
= container_of(ia
, struct rpcrdma_xprt
,
469 struct rpcrdma_ep
*ep
= &r_xprt
->rx_ep
;
470 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
471 struct rpcrdma_req
*req
;
472 struct rpcrdma_rep
*rep
;
474 cancel_delayed_work_sync(&buf
->rb_refresh_worker
);
476 /* This is similar to rpcrdma_ep_destroy, but:
477 * - Don't cancel the connect worker.
478 * - Don't call rpcrdma_ep_disconnect, which waits
479 * for another conn upcall, which will deadlock.
480 * - rdma_disconnect is unneeded, the underlying
481 * connection is already gone.
484 ib_drain_qp(ia
->ri_id
->qp
);
485 rdma_destroy_qp(ia
->ri_id
);
486 ia
->ri_id
->qp
= NULL
;
488 ib_free_cq(ep
->rep_attr
.recv_cq
);
489 ep
->rep_attr
.recv_cq
= NULL
;
490 ib_free_cq(ep
->rep_attr
.send_cq
);
491 ep
->rep_attr
.send_cq
= NULL
;
493 /* The ULP is responsible for ensuring all DMA
494 * mappings and MRs are gone.
496 list_for_each_entry(rep
, &buf
->rb_recv_bufs
, rr_list
)
497 rpcrdma_dma_unmap_regbuf(rep
->rr_rdmabuf
);
498 list_for_each_entry(req
, &buf
->rb_allreqs
, rl_all
) {
499 rpcrdma_dma_unmap_regbuf(req
->rl_rdmabuf
);
500 rpcrdma_dma_unmap_regbuf(req
->rl_sendbuf
);
501 rpcrdma_dma_unmap_regbuf(req
->rl_recvbuf
);
503 rpcrdma_mrs_destroy(buf
);
504 ib_dealloc_pd(ia
->ri_pd
);
507 /* Allow waiters to continue */
508 complete(&ia
->ri_remove_done
);
510 trace_xprtrdma_remove(r_xprt
);
514 * rpcrdma_ia_close - Clean up/close an IA.
515 * @ia: interface adapter to close
519 rpcrdma_ia_close(struct rpcrdma_ia
*ia
)
521 if (ia
->ri_id
!= NULL
&& !IS_ERR(ia
->ri_id
)) {
523 rdma_destroy_qp(ia
->ri_id
);
524 rdma_destroy_id(ia
->ri_id
);
527 ia
->ri_device
= NULL
;
529 /* If the pd is still busy, xprtrdma missed freeing a resource */
530 if (ia
->ri_pd
&& !IS_ERR(ia
->ri_pd
))
531 ib_dealloc_pd(ia
->ri_pd
);
536 * Create unconnected endpoint.
539 rpcrdma_ep_create(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
,
540 struct rpcrdma_create_data_internal
*cdata
)
542 struct rpcrdma_connect_private
*pmsg
= &ep
->rep_cm_private
;
543 struct ib_cq
*sendcq
, *recvcq
;
544 unsigned int max_sge
;
547 max_sge
= min_t(unsigned int, ia
->ri_device
->attrs
.max_send_sge
,
548 RPCRDMA_MAX_SEND_SGES
);
549 if (max_sge
< RPCRDMA_MIN_SEND_SGES
) {
550 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge
);
553 ia
->ri_max_send_sges
= max_sge
;
555 rc
= ia
->ri_ops
->ro_open(ia
, ep
, cdata
);
559 ep
->rep_attr
.event_handler
= rpcrdma_qp_event_handler
;
560 ep
->rep_attr
.qp_context
= ep
;
561 ep
->rep_attr
.srq
= NULL
;
562 ep
->rep_attr
.cap
.max_send_sge
= max_sge
;
563 ep
->rep_attr
.cap
.max_recv_sge
= 1;
564 ep
->rep_attr
.cap
.max_inline_data
= 0;
565 ep
->rep_attr
.sq_sig_type
= IB_SIGNAL_REQ_WR
;
566 ep
->rep_attr
.qp_type
= IB_QPT_RC
;
567 ep
->rep_attr
.port_num
= ~0;
569 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
570 "iovs: send %d recv %d\n",
572 ep
->rep_attr
.cap
.max_send_wr
,
573 ep
->rep_attr
.cap
.max_recv_wr
,
574 ep
->rep_attr
.cap
.max_send_sge
,
575 ep
->rep_attr
.cap
.max_recv_sge
);
577 /* set trigger for requesting send completion */
578 ep
->rep_send_batch
= min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH
,
579 cdata
->max_requests
>> 2);
580 ep
->rep_send_count
= ep
->rep_send_batch
;
581 init_waitqueue_head(&ep
->rep_connect_wait
);
582 INIT_DELAYED_WORK(&ep
->rep_disconnect_worker
,
583 rpcrdma_disconnect_worker
);
585 sendcq
= ib_alloc_cq(ia
->ri_device
, NULL
,
586 ep
->rep_attr
.cap
.max_send_wr
+ 1,
587 1, IB_POLL_WORKQUEUE
);
588 if (IS_ERR(sendcq
)) {
589 rc
= PTR_ERR(sendcq
);
590 dprintk("RPC: %s: failed to create send CQ: %i\n",
595 recvcq
= ib_alloc_cq(ia
->ri_device
, NULL
,
596 ep
->rep_attr
.cap
.max_recv_wr
+ 1,
597 0, IB_POLL_WORKQUEUE
);
598 if (IS_ERR(recvcq
)) {
599 rc
= PTR_ERR(recvcq
);
600 dprintk("RPC: %s: failed to create recv CQ: %i\n",
605 ep
->rep_attr
.send_cq
= sendcq
;
606 ep
->rep_attr
.recv_cq
= recvcq
;
608 /* Initialize cma parameters */
609 memset(&ep
->rep_remote_cma
, 0, sizeof(ep
->rep_remote_cma
));
611 /* Prepare RDMA-CM private message */
612 pmsg
->cp_magic
= rpcrdma_cmp_magic
;
613 pmsg
->cp_version
= RPCRDMA_CMP_VERSION
;
614 pmsg
->cp_flags
|= ia
->ri_ops
->ro_send_w_inv_ok
;
615 pmsg
->cp_send_size
= rpcrdma_encode_buffer_size(cdata
->inline_wsize
);
616 pmsg
->cp_recv_size
= rpcrdma_encode_buffer_size(cdata
->inline_rsize
);
617 ep
->rep_remote_cma
.private_data
= pmsg
;
618 ep
->rep_remote_cma
.private_data_len
= sizeof(*pmsg
);
620 /* Client offers RDMA Read but does not initiate */
621 ep
->rep_remote_cma
.initiator_depth
= 0;
622 ep
->rep_remote_cma
.responder_resources
=
623 min_t(int, U8_MAX
, ia
->ri_device
->attrs
.max_qp_rd_atom
);
625 /* Limit transport retries so client can detect server
626 * GID changes quickly. RPC layer handles re-establishing
627 * transport connection and retransmission.
629 ep
->rep_remote_cma
.retry_count
= 6;
631 /* RPC-over-RDMA handles its own flow control. In addition,
632 * make all RNR NAKs visible so we know that RPC-over-RDMA
633 * flow control is working correctly (no NAKs should be seen).
635 ep
->rep_remote_cma
.flow_control
= 0;
636 ep
->rep_remote_cma
.rnr_retry_count
= 0;
649 * Disconnect and destroy endpoint. After this, the only
650 * valid operations on the ep are to free it (if dynamically
651 * allocated) or re-create it.
654 rpcrdma_ep_destroy(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
656 cancel_delayed_work_sync(&ep
->rep_disconnect_worker
);
658 if (ia
->ri_id
&& ia
->ri_id
->qp
) {
659 rpcrdma_ep_disconnect(ep
, ia
);
660 rdma_destroy_qp(ia
->ri_id
);
661 ia
->ri_id
->qp
= NULL
;
664 if (ep
->rep_attr
.recv_cq
)
665 ib_free_cq(ep
->rep_attr
.recv_cq
);
666 if (ep
->rep_attr
.send_cq
)
667 ib_free_cq(ep
->rep_attr
.send_cq
);
670 /* Re-establish a connection after a device removal event.
671 * Unlike a normal reconnection, a fresh PD and a new set
672 * of MRs and buffers is needed.
675 rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt
*r_xprt
,
676 struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
680 trace_xprtrdma_reinsert(r_xprt
);
683 if (rpcrdma_ia_open(r_xprt
))
687 err
= rpcrdma_ep_create(ep
, ia
, &r_xprt
->rx_data
);
689 pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err
);
694 err
= rdma_create_qp(ia
->ri_id
, ia
->ri_pd
, &ep
->rep_attr
);
696 pr_err("rpcrdma: rdma_create_qp returned %d\n", err
);
700 rpcrdma_mrs_create(r_xprt
);
704 rpcrdma_ep_destroy(ep
, ia
);
706 rpcrdma_ia_close(ia
);
712 rpcrdma_ep_reconnect(struct rpcrdma_xprt
*r_xprt
, struct rpcrdma_ep
*ep
,
713 struct rpcrdma_ia
*ia
)
715 struct rdma_cm_id
*id
, *old
;
718 trace_xprtrdma_reconnect(r_xprt
);
720 rpcrdma_ep_disconnect(ep
, ia
);
723 id
= rpcrdma_create_id(r_xprt
, ia
);
727 /* As long as the new ID points to the same device as the
728 * old ID, we can reuse the transport's existing PD and all
729 * previously allocated MRs. Also, the same device means
730 * the transport's previous DMA mappings are still valid.
732 * This is a sanity check only. There should be no way these
733 * point to two different devices here.
737 if (ia
->ri_device
!= id
->device
) {
738 pr_err("rpcrdma: can't reconnect on different device!\n");
742 err
= rdma_create_qp(id
, ia
->ri_pd
, &ep
->rep_attr
);
744 dprintk("RPC: %s: rdma_create_qp returned %d\n",
749 /* Atomically replace the transport's ID and QP. */
753 rdma_destroy_qp(old
);
756 rdma_destroy_id(old
);
762 * Connect unconnected endpoint.
765 rpcrdma_ep_connect(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
767 struct rpcrdma_xprt
*r_xprt
= container_of(ia
, struct rpcrdma_xprt
,
769 struct rpc_xprt
*xprt
= &r_xprt
->rx_xprt
;
773 switch (ep
->rep_connected
) {
775 dprintk("RPC: %s: connecting...\n", __func__
);
776 rc
= rdma_create_qp(ia
->ri_id
, ia
->ri_pd
, &ep
->rep_attr
);
778 dprintk("RPC: %s: rdma_create_qp failed %i\n",
785 rc
= rpcrdma_ep_recreate_xprt(r_xprt
, ep
, ia
);
790 rc
= rpcrdma_ep_reconnect(r_xprt
, ep
, ia
);
795 ep
->rep_connected
= 0;
796 xprt_clear_connected(xprt
);
798 rpcrdma_post_recvs(r_xprt
, true);
800 rc
= rdma_connect(ia
->ri_id
, &ep
->rep_remote_cma
);
802 dprintk("RPC: %s: rdma_connect() failed with %i\n",
807 wait_event_interruptible(ep
->rep_connect_wait
, ep
->rep_connected
!= 0);
808 if (ep
->rep_connected
<= 0) {
809 if (ep
->rep_connected
== -EAGAIN
)
811 rc
= ep
->rep_connected
;
815 dprintk("RPC: %s: connected\n", __func__
);
819 ep
->rep_connected
= rc
;
826 * rpcrdma_ep_disconnect
828 * This is separate from destroy to facilitate the ability
829 * to reconnect without recreating the endpoint.
831 * This call is not reentrant, and must not be made in parallel
832 * on the same endpoint.
835 rpcrdma_ep_disconnect(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
839 rc
= rdma_disconnect(ia
->ri_id
);
841 /* returns without wait if not connected */
842 wait_event_interruptible(ep
->rep_connect_wait
,
843 ep
->rep_connected
!= 1);
845 ep
->rep_connected
= rc
;
846 trace_xprtrdma_disconnect(container_of(ep
, struct rpcrdma_xprt
,
849 ib_drain_qp(ia
->ri_id
->qp
);
852 /* Fixed-size circular FIFO queue. This implementation is wait-free and
855 * Consumer is the code path that posts Sends. This path dequeues a
856 * sendctx for use by a Send operation. Multiple consumer threads
857 * are serialized by the RPC transport lock, which allows only one
858 * ->send_request call at a time.
860 * Producer is the code path that handles Send completions. This path
861 * enqueues a sendctx that has been completed. Multiple producer
862 * threads are serialized by the ib_poll_cq() function.
865 /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced
866 * queue activity, and ib_drain_qp has flushed all remaining Send
869 static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer
*buf
)
873 for (i
= 0; i
<= buf
->rb_sc_last
; i
++)
874 kfree(buf
->rb_sc_ctxs
[i
]);
875 kfree(buf
->rb_sc_ctxs
);
878 static struct rpcrdma_sendctx
*rpcrdma_sendctx_create(struct rpcrdma_ia
*ia
)
880 struct rpcrdma_sendctx
*sc
;
882 sc
= kzalloc(sizeof(*sc
) +
883 ia
->ri_max_send_sges
* sizeof(struct ib_sge
),
888 sc
->sc_wr
.wr_cqe
= &sc
->sc_cqe
;
889 sc
->sc_wr
.sg_list
= sc
->sc_sges
;
890 sc
->sc_wr
.opcode
= IB_WR_SEND
;
891 sc
->sc_cqe
.done
= rpcrdma_wc_send
;
895 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt
*r_xprt
)
897 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
898 struct rpcrdma_sendctx
*sc
;
901 /* Maximum number of concurrent outstanding Send WRs. Capping
902 * the circular queue size stops Send Queue overflow by causing
903 * the ->send_request call to fail temporarily before too many
906 i
= buf
->rb_max_requests
+ RPCRDMA_MAX_BC_REQUESTS
;
907 dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__
, i
);
908 buf
->rb_sc_ctxs
= kcalloc(i
, sizeof(sc
), GFP_KERNEL
);
909 if (!buf
->rb_sc_ctxs
)
912 buf
->rb_sc_last
= i
- 1;
913 for (i
= 0; i
<= buf
->rb_sc_last
; i
++) {
914 sc
= rpcrdma_sendctx_create(&r_xprt
->rx_ia
);
918 sc
->sc_xprt
= r_xprt
;
919 buf
->rb_sc_ctxs
[i
] = sc
;
925 rpcrdma_sendctxs_destroy(buf
);
929 /* The sendctx queue is not guaranteed to have a size that is a
930 * power of two, thus the helpers in circ_buf.h cannot be used.
931 * The other option is to use modulus (%), which can be expensive.
933 static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer
*buf
,
936 return likely(item
< buf
->rb_sc_last
) ? item
+ 1 : 0;
940 * rpcrdma_sendctx_get_locked - Acquire a send context
941 * @buf: transport buffers from which to acquire an unused context
943 * Returns pointer to a free send completion context; or NULL if
944 * the queue is empty.
946 * Usage: Called to acquire an SGE array before preparing a Send WR.
948 * The caller serializes calls to this function (per rpcrdma_buffer),
949 * and provides an effective memory barrier that flushes the new value
952 struct rpcrdma_sendctx
*rpcrdma_sendctx_get_locked(struct rpcrdma_buffer
*buf
)
954 struct rpcrdma_xprt
*r_xprt
;
955 struct rpcrdma_sendctx
*sc
;
956 unsigned long next_head
;
958 next_head
= rpcrdma_sendctx_next(buf
, buf
->rb_sc_head
);
960 if (next_head
== READ_ONCE(buf
->rb_sc_tail
))
963 /* ORDER: item must be accessed _before_ head is updated */
964 sc
= buf
->rb_sc_ctxs
[next_head
];
966 /* Releasing the lock in the caller acts as a memory
967 * barrier that flushes rb_sc_head.
969 buf
->rb_sc_head
= next_head
;
974 /* The queue is "empty" if there have not been enough Send
975 * completions recently. This is a sign the Send Queue is
976 * backing up. Cause the caller to pause and try again.
978 set_bit(RPCRDMA_BUF_F_EMPTY_SCQ
, &buf
->rb_flags
);
979 r_xprt
= container_of(buf
, struct rpcrdma_xprt
, rx_buf
);
980 r_xprt
->rx_stats
.empty_sendctx_q
++;
985 * rpcrdma_sendctx_put_locked - Release a send context
986 * @sc: send context to release
988 * Usage: Called from Send completion to return a sendctxt
991 * The caller serializes calls to this function (per rpcrdma_buffer).
994 rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx
*sc
)
996 struct rpcrdma_buffer
*buf
= &sc
->sc_xprt
->rx_buf
;
997 unsigned long next_tail
;
999 /* Unmap SGEs of previously completed by unsignaled
1000 * Sends by walking up the queue until @sc is found.
1002 next_tail
= buf
->rb_sc_tail
;
1004 next_tail
= rpcrdma_sendctx_next(buf
, next_tail
);
1006 /* ORDER: item must be accessed _before_ tail is updated */
1007 rpcrdma_unmap_sendctx(buf
->rb_sc_ctxs
[next_tail
]);
1009 } while (buf
->rb_sc_ctxs
[next_tail
] != sc
);
1011 /* Paired with READ_ONCE */
1012 smp_store_release(&buf
->rb_sc_tail
, next_tail
);
1014 if (test_and_clear_bit(RPCRDMA_BUF_F_EMPTY_SCQ
, &buf
->rb_flags
)) {
1015 smp_mb__after_atomic();
1016 xprt_write_space(&sc
->sc_xprt
->rx_xprt
);
1021 rpcrdma_mrs_create(struct rpcrdma_xprt
*r_xprt
)
1023 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1024 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
1029 for (count
= 0; count
< ia
->ri_max_segs
; count
++) {
1030 struct rpcrdma_mr
*mr
;
1033 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1037 rc
= ia
->ri_ops
->ro_init_mr(ia
, mr
);
1043 mr
->mr_xprt
= r_xprt
;
1045 list_add(&mr
->mr_list
, &free
);
1046 list_add(&mr
->mr_all
, &all
);
1049 spin_lock(&buf
->rb_mrlock
);
1050 list_splice(&free
, &buf
->rb_mrs
);
1051 list_splice(&all
, &buf
->rb_all
);
1052 r_xprt
->rx_stats
.mrs_allocated
+= count
;
1053 spin_unlock(&buf
->rb_mrlock
);
1054 trace_xprtrdma_createmrs(r_xprt
, count
);
1056 xprt_write_space(&r_xprt
->rx_xprt
);
1060 rpcrdma_mr_refresh_worker(struct work_struct
*work
)
1062 struct rpcrdma_buffer
*buf
= container_of(work
, struct rpcrdma_buffer
,
1063 rb_refresh_worker
.work
);
1064 struct rpcrdma_xprt
*r_xprt
= container_of(buf
, struct rpcrdma_xprt
,
1067 rpcrdma_mrs_create(r_xprt
);
1070 struct rpcrdma_req
*
1071 rpcrdma_create_req(struct rpcrdma_xprt
*r_xprt
)
1073 struct rpcrdma_buffer
*buffer
= &r_xprt
->rx_buf
;
1074 struct rpcrdma_regbuf
*rb
;
1075 struct rpcrdma_req
*req
;
1077 req
= kzalloc(sizeof(*req
), GFP_KERNEL
);
1079 return ERR_PTR(-ENOMEM
);
1081 rb
= rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE
,
1082 DMA_TO_DEVICE
, GFP_KERNEL
);
1085 return ERR_PTR(-ENOMEM
);
1087 req
->rl_rdmabuf
= rb
;
1088 xdr_buf_init(&req
->rl_hdrbuf
, rb
->rg_base
, rdmab_length(rb
));
1089 req
->rl_buffer
= buffer
;
1090 INIT_LIST_HEAD(&req
->rl_registered
);
1092 spin_lock(&buffer
->rb_reqslock
);
1093 list_add(&req
->rl_all
, &buffer
->rb_allreqs
);
1094 spin_unlock(&buffer
->rb_reqslock
);
1099 rpcrdma_create_rep(struct rpcrdma_xprt
*r_xprt
, bool temp
)
1101 struct rpcrdma_create_data_internal
*cdata
= &r_xprt
->rx_data
;
1102 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1103 struct rpcrdma_rep
*rep
;
1107 rep
= kzalloc(sizeof(*rep
), GFP_KERNEL
);
1111 rep
->rr_rdmabuf
= rpcrdma_alloc_regbuf(cdata
->inline_rsize
,
1112 DMA_FROM_DEVICE
, GFP_KERNEL
);
1113 if (IS_ERR(rep
->rr_rdmabuf
)) {
1114 rc
= PTR_ERR(rep
->rr_rdmabuf
);
1117 xdr_buf_init(&rep
->rr_hdrbuf
, rep
->rr_rdmabuf
->rg_base
,
1118 rdmab_length(rep
->rr_rdmabuf
));
1120 rep
->rr_cqe
.done
= rpcrdma_wc_receive
;
1121 rep
->rr_rxprt
= r_xprt
;
1122 INIT_WORK(&rep
->rr_work
, rpcrdma_deferred_completion
);
1123 rep
->rr_recv_wr
.next
= NULL
;
1124 rep
->rr_recv_wr
.wr_cqe
= &rep
->rr_cqe
;
1125 rep
->rr_recv_wr
.sg_list
= &rep
->rr_rdmabuf
->rg_iov
;
1126 rep
->rr_recv_wr
.num_sge
= 1;
1127 rep
->rr_temp
= temp
;
1129 spin_lock(&buf
->rb_lock
);
1130 list_add(&rep
->rr_list
, &buf
->rb_recv_bufs
);
1131 spin_unlock(&buf
->rb_lock
);
1137 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1143 rpcrdma_buffer_create(struct rpcrdma_xprt
*r_xprt
)
1145 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1149 buf
->rb_max_requests
= r_xprt
->rx_data
.max_requests
;
1150 buf
->rb_bc_srv_max_requests
= 0;
1151 spin_lock_init(&buf
->rb_mrlock
);
1152 spin_lock_init(&buf
->rb_lock
);
1153 INIT_LIST_HEAD(&buf
->rb_mrs
);
1154 INIT_LIST_HEAD(&buf
->rb_all
);
1155 INIT_DELAYED_WORK(&buf
->rb_refresh_worker
,
1156 rpcrdma_mr_refresh_worker
);
1158 rpcrdma_mrs_create(r_xprt
);
1160 INIT_LIST_HEAD(&buf
->rb_send_bufs
);
1161 INIT_LIST_HEAD(&buf
->rb_allreqs
);
1162 spin_lock_init(&buf
->rb_reqslock
);
1163 for (i
= 0; i
< buf
->rb_max_requests
; i
++) {
1164 struct rpcrdma_req
*req
;
1166 req
= rpcrdma_create_req(r_xprt
);
1168 dprintk("RPC: %s: request buffer %d alloc"
1169 " failed\n", __func__
, i
);
1173 list_add(&req
->rl_list
, &buf
->rb_send_bufs
);
1176 buf
->rb_credits
= 1;
1177 buf
->rb_posted_receives
= 0;
1178 INIT_LIST_HEAD(&buf
->rb_recv_bufs
);
1180 rc
= rpcrdma_sendctxs_create(r_xprt
);
1186 rpcrdma_buffer_destroy(buf
);
1191 rpcrdma_destroy_rep(struct rpcrdma_rep
*rep
)
1193 rpcrdma_free_regbuf(rep
->rr_rdmabuf
);
1198 rpcrdma_destroy_req(struct rpcrdma_req
*req
)
1200 rpcrdma_free_regbuf(req
->rl_recvbuf
);
1201 rpcrdma_free_regbuf(req
->rl_sendbuf
);
1202 rpcrdma_free_regbuf(req
->rl_rdmabuf
);
1207 rpcrdma_mrs_destroy(struct rpcrdma_buffer
*buf
)
1209 struct rpcrdma_xprt
*r_xprt
= container_of(buf
, struct rpcrdma_xprt
,
1211 struct rpcrdma_ia
*ia
= rdmab_to_ia(buf
);
1212 struct rpcrdma_mr
*mr
;
1216 spin_lock(&buf
->rb_mrlock
);
1217 while (!list_empty(&buf
->rb_all
)) {
1218 mr
= list_entry(buf
->rb_all
.next
, struct rpcrdma_mr
, mr_all
);
1219 list_del(&mr
->mr_all
);
1221 spin_unlock(&buf
->rb_mrlock
);
1223 /* Ensure MW is not on any rl_registered list */
1224 if (!list_empty(&mr
->mr_list
))
1225 list_del(&mr
->mr_list
);
1227 ia
->ri_ops
->ro_release_mr(mr
);
1229 spin_lock(&buf
->rb_mrlock
);
1231 spin_unlock(&buf
->rb_mrlock
);
1232 r_xprt
->rx_stats
.mrs_allocated
= 0;
1234 dprintk("RPC: %s: released %u MRs\n", __func__
, count
);
1238 rpcrdma_buffer_destroy(struct rpcrdma_buffer
*buf
)
1240 cancel_delayed_work_sync(&buf
->rb_refresh_worker
);
1242 rpcrdma_sendctxs_destroy(buf
);
1244 while (!list_empty(&buf
->rb_recv_bufs
)) {
1245 struct rpcrdma_rep
*rep
;
1247 rep
= list_first_entry(&buf
->rb_recv_bufs
,
1248 struct rpcrdma_rep
, rr_list
);
1249 list_del(&rep
->rr_list
);
1250 rpcrdma_destroy_rep(rep
);
1253 spin_lock(&buf
->rb_reqslock
);
1254 while (!list_empty(&buf
->rb_allreqs
)) {
1255 struct rpcrdma_req
*req
;
1257 req
= list_first_entry(&buf
->rb_allreqs
,
1258 struct rpcrdma_req
, rl_all
);
1259 list_del(&req
->rl_all
);
1261 spin_unlock(&buf
->rb_reqslock
);
1262 rpcrdma_destroy_req(req
);
1263 spin_lock(&buf
->rb_reqslock
);
1265 spin_unlock(&buf
->rb_reqslock
);
1267 rpcrdma_mrs_destroy(buf
);
1271 * rpcrdma_mr_get - Allocate an rpcrdma_mr object
1272 * @r_xprt: controlling transport
1274 * Returns an initialized rpcrdma_mr or NULL if no free
1275 * rpcrdma_mr objects are available.
1278 rpcrdma_mr_get(struct rpcrdma_xprt
*r_xprt
)
1280 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1281 struct rpcrdma_mr
*mr
= NULL
;
1283 spin_lock(&buf
->rb_mrlock
);
1284 if (!list_empty(&buf
->rb_mrs
))
1285 mr
= rpcrdma_mr_pop(&buf
->rb_mrs
);
1286 spin_unlock(&buf
->rb_mrlock
);
1293 trace_xprtrdma_nomrs(r_xprt
);
1294 if (r_xprt
->rx_ep
.rep_connected
!= -ENODEV
)
1295 schedule_delayed_work(&buf
->rb_refresh_worker
, 0);
1297 /* Allow the reply handler and refresh worker to run */
1304 __rpcrdma_mr_put(struct rpcrdma_buffer
*buf
, struct rpcrdma_mr
*mr
)
1306 spin_lock(&buf
->rb_mrlock
);
1307 rpcrdma_mr_push(mr
, &buf
->rb_mrs
);
1308 spin_unlock(&buf
->rb_mrlock
);
1312 * rpcrdma_mr_put - Release an rpcrdma_mr object
1313 * @mr: object to release
1317 rpcrdma_mr_put(struct rpcrdma_mr
*mr
)
1319 __rpcrdma_mr_put(&mr
->mr_xprt
->rx_buf
, mr
);
1323 * rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
1324 * @mr: object to release
1328 rpcrdma_mr_unmap_and_put(struct rpcrdma_mr
*mr
)
1330 struct rpcrdma_xprt
*r_xprt
= mr
->mr_xprt
;
1332 trace_xprtrdma_mr_unmap(mr
);
1333 ib_dma_unmap_sg(r_xprt
->rx_ia
.ri_device
,
1334 mr
->mr_sg
, mr
->mr_nents
, mr
->mr_dir
);
1335 __rpcrdma_mr_put(&r_xprt
->rx_buf
, mr
);
1339 * rpcrdma_buffer_get - Get a request buffer
1340 * @buffers: Buffer pool from which to obtain a buffer
1342 * Returns a fresh rpcrdma_req, or NULL if none are available.
1344 struct rpcrdma_req
*
1345 rpcrdma_buffer_get(struct rpcrdma_buffer
*buffers
)
1347 struct rpcrdma_req
*req
;
1349 spin_lock(&buffers
->rb_lock
);
1350 req
= list_first_entry_or_null(&buffers
->rb_send_bufs
,
1351 struct rpcrdma_req
, rl_list
);
1353 list_del_init(&req
->rl_list
);
1354 spin_unlock(&buffers
->rb_lock
);
1359 * rpcrdma_buffer_put - Put request/reply buffers back into pool
1360 * @req: object to return
1364 rpcrdma_buffer_put(struct rpcrdma_req
*req
)
1366 struct rpcrdma_buffer
*buffers
= req
->rl_buffer
;
1367 struct rpcrdma_rep
*rep
= req
->rl_reply
;
1369 req
->rl_reply
= NULL
;
1371 spin_lock(&buffers
->rb_lock
);
1372 list_add(&req
->rl_list
, &buffers
->rb_send_bufs
);
1374 if (!rep
->rr_temp
) {
1375 list_add(&rep
->rr_list
, &buffers
->rb_recv_bufs
);
1379 spin_unlock(&buffers
->rb_lock
);
1381 rpcrdma_destroy_rep(rep
);
1385 * Put reply buffers back into pool when not attached to
1386 * request. This happens in error conditions.
1389 rpcrdma_recv_buffer_put(struct rpcrdma_rep
*rep
)
1391 struct rpcrdma_buffer
*buffers
= &rep
->rr_rxprt
->rx_buf
;
1393 if (!rep
->rr_temp
) {
1394 spin_lock(&buffers
->rb_lock
);
1395 list_add(&rep
->rr_list
, &buffers
->rb_recv_bufs
);
1396 spin_unlock(&buffers
->rb_lock
);
1398 rpcrdma_destroy_rep(rep
);
1403 * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
1404 * @size: size of buffer to be allocated, in bytes
1405 * @direction: direction of data movement
1408 * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
1409 * can be persistently DMA-mapped for I/O.
1411 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1412 * receiving the payload of RDMA RECV operations. During Long Calls
1413 * or Replies they may be registered externally via ro_map.
1415 struct rpcrdma_regbuf
*
1416 rpcrdma_alloc_regbuf(size_t size
, enum dma_data_direction direction
,
1419 struct rpcrdma_regbuf
*rb
;
1421 rb
= kmalloc(sizeof(*rb
) + size
, flags
);
1423 return ERR_PTR(-ENOMEM
);
1425 rb
->rg_device
= NULL
;
1426 rb
->rg_direction
= direction
;
1427 rb
->rg_iov
.length
= size
;
1433 * __rpcrdma_map_regbuf - DMA-map a regbuf
1434 * @ia: controlling rpcrdma_ia
1435 * @rb: regbuf to be mapped
1438 __rpcrdma_dma_map_regbuf(struct rpcrdma_ia
*ia
, struct rpcrdma_regbuf
*rb
)
1440 struct ib_device
*device
= ia
->ri_device
;
1442 if (rb
->rg_direction
== DMA_NONE
)
1445 rb
->rg_iov
.addr
= ib_dma_map_single(device
,
1446 (void *)rb
->rg_base
,
1449 if (ib_dma_mapping_error(device
, rdmab_addr(rb
)))
1452 rb
->rg_device
= device
;
1453 rb
->rg_iov
.lkey
= ia
->ri_pd
->local_dma_lkey
;
1458 rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf
*rb
)
1463 if (!rpcrdma_regbuf_is_mapped(rb
))
1466 ib_dma_unmap_single(rb
->rg_device
, rdmab_addr(rb
),
1467 rdmab_length(rb
), rb
->rg_direction
);
1468 rb
->rg_device
= NULL
;
1472 * rpcrdma_free_regbuf - deregister and free registered buffer
1473 * @rb: regbuf to be deregistered and freed
1476 rpcrdma_free_regbuf(struct rpcrdma_regbuf
*rb
)
1478 rpcrdma_dma_unmap_regbuf(rb
);
1483 * Prepost any receive buffer, then post send.
1485 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1488 rpcrdma_ep_post(struct rpcrdma_ia
*ia
,
1489 struct rpcrdma_ep
*ep
,
1490 struct rpcrdma_req
*req
)
1492 struct ib_send_wr
*send_wr
= &req
->rl_sendctx
->sc_wr
;
1495 if (!ep
->rep_send_count
||
1496 test_bit(RPCRDMA_REQ_F_TX_RESOURCES
, &req
->rl_flags
)) {
1497 send_wr
->send_flags
|= IB_SEND_SIGNALED
;
1498 ep
->rep_send_count
= ep
->rep_send_batch
;
1500 send_wr
->send_flags
&= ~IB_SEND_SIGNALED
;
1501 --ep
->rep_send_count
;
1504 rc
= ia
->ri_ops
->ro_send(ia
, req
);
1505 trace_xprtrdma_post_send(req
, rc
);
1512 * rpcrdma_post_recvs - Maybe post some Receive buffers
1513 * @r_xprt: controlling transport
1514 * @temp: when true, allocate temp rpcrdma_rep objects
1518 rpcrdma_post_recvs(struct rpcrdma_xprt
*r_xprt
, bool temp
)
1520 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1521 struct ib_recv_wr
*wr
, *bad_wr
;
1522 int needed
, count
, rc
;
1526 needed
= buf
->rb_credits
+ (buf
->rb_bc_srv_max_requests
<< 1);
1527 if (buf
->rb_posted_receives
> needed
)
1529 needed
-= buf
->rb_posted_receives
;
1534 struct rpcrdma_regbuf
*rb
;
1535 struct rpcrdma_rep
*rep
;
1537 spin_lock(&buf
->rb_lock
);
1538 rep
= list_first_entry_or_null(&buf
->rb_recv_bufs
,
1539 struct rpcrdma_rep
, rr_list
);
1541 list_del(&rep
->rr_list
);
1542 spin_unlock(&buf
->rb_lock
);
1544 if (rpcrdma_create_rep(r_xprt
, temp
))
1549 rb
= rep
->rr_rdmabuf
;
1550 if (!rpcrdma_regbuf_is_mapped(rb
)) {
1551 if (!__rpcrdma_dma_map_regbuf(&r_xprt
->rx_ia
, rb
)) {
1552 rpcrdma_recv_buffer_put(rep
);
1557 trace_xprtrdma_post_recv(rep
->rr_recv_wr
.wr_cqe
);
1558 rep
->rr_recv_wr
.next
= wr
;
1559 wr
= &rep
->rr_recv_wr
;
1566 rc
= ib_post_recv(r_xprt
->rx_ia
.ri_id
->qp
, wr
,
1567 (const struct ib_recv_wr
**)&bad_wr
);
1569 for (wr
= bad_wr
; wr
; wr
= wr
->next
) {
1570 struct rpcrdma_rep
*rep
;
1572 rep
= container_of(wr
, struct rpcrdma_rep
, rr_recv_wr
);
1573 rpcrdma_recv_buffer_put(rep
);
1577 buf
->rb_posted_receives
+= count
;
1579 trace_xprtrdma_post_recvs(r_xprt
, count
, rc
);