1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2016 HGST, a Western Digital Company.
5 #include <linux/moduleparam.h>
6 #include <linux/slab.h>
7 #include <linux/pci-p2pdma.h>
8 #include <rdma/mr_pool.h>
18 static bool rdma_rw_force_mr
;
19 module_param_named(force_mr
, rdma_rw_force_mr
, bool, 0);
20 MODULE_PARM_DESC(force_mr
, "Force usage of MRs for RDMA READ/WRITE operations");
23 * Report whether memory registration should be used. Memory registration must
24 * be used for iWarp devices because of iWARP-specific limitations. Memory
25 * registration is also enabled if registering memory might yield better
26 * performance than using multiple SGE entries, see rdma_rw_io_needs_mr()
28 static inline bool rdma_rw_can_use_mr(struct ib_device
*dev
, u8 port_num
)
30 if (rdma_protocol_iwarp(dev
, port_num
))
32 if (dev
->attrs
.max_sgl_rd
)
34 if (unlikely(rdma_rw_force_mr
))
40 * Check if the device will use memory registration for this RW operation.
41 * For RDMA READs we must use MRs on iWarp and can optionally use them as an
42 * optimization otherwise. Additionally we have a debug option to force usage
43 * of MRs to help testing this code path.
45 static inline bool rdma_rw_io_needs_mr(struct ib_device
*dev
, u8 port_num
,
46 enum dma_data_direction dir
, int dma_nents
)
48 if (dir
== DMA_FROM_DEVICE
) {
49 if (rdma_protocol_iwarp(dev
, port_num
))
51 if (dev
->attrs
.max_sgl_rd
&& dma_nents
> dev
->attrs
.max_sgl_rd
)
54 if (unlikely(rdma_rw_force_mr
))
59 static inline u32
rdma_rw_fr_page_list_len(struct ib_device
*dev
,
65 max_pages
= dev
->attrs
.max_pi_fast_reg_page_list_len
;
67 max_pages
= dev
->attrs
.max_fast_reg_page_list_len
;
69 /* arbitrary limit to avoid allocating gigantic resources */
70 return min_t(u32
, max_pages
, 256);
73 static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx
*reg
)
77 if (reg
->mr
->need_inval
) {
78 reg
->inv_wr
.opcode
= IB_WR_LOCAL_INV
;
79 reg
->inv_wr
.ex
.invalidate_rkey
= reg
->mr
->lkey
;
80 reg
->inv_wr
.next
= ®
->reg_wr
.wr
;
83 reg
->inv_wr
.next
= NULL
;
89 /* Caller must have zero-initialized *reg. */
90 static int rdma_rw_init_one_mr(struct ib_qp
*qp
, u8 port_num
,
91 struct rdma_rw_reg_ctx
*reg
, struct scatterlist
*sg
,
92 u32 sg_cnt
, u32 offset
)
94 u32 pages_per_mr
= rdma_rw_fr_page_list_len(qp
->pd
->device
,
96 u32 nents
= min(sg_cnt
, pages_per_mr
);
99 reg
->mr
= ib_mr_pool_get(qp
, &qp
->rdma_mrs
);
103 count
+= rdma_rw_inv_key(reg
);
105 ret
= ib_map_mr_sg(reg
->mr
, sg
, nents
, &offset
, PAGE_SIZE
);
106 if (ret
< 0 || ret
< nents
) {
107 ib_mr_pool_put(qp
, &qp
->rdma_mrs
, reg
->mr
);
111 reg
->reg_wr
.wr
.opcode
= IB_WR_REG_MR
;
112 reg
->reg_wr
.mr
= reg
->mr
;
113 reg
->reg_wr
.access
= IB_ACCESS_LOCAL_WRITE
;
114 if (rdma_protocol_iwarp(qp
->device
, port_num
))
115 reg
->reg_wr
.access
|= IB_ACCESS_REMOTE_WRITE
;
118 reg
->sge
.addr
= reg
->mr
->iova
;
119 reg
->sge
.length
= reg
->mr
->length
;
123 static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx
*ctx
, struct ib_qp
*qp
,
124 u8 port_num
, struct scatterlist
*sg
, u32 sg_cnt
, u32 offset
,
125 u64 remote_addr
, u32 rkey
, enum dma_data_direction dir
)
127 struct rdma_rw_reg_ctx
*prev
= NULL
;
128 u32 pages_per_mr
= rdma_rw_fr_page_list_len(qp
->pd
->device
,
130 int i
, j
, ret
= 0, count
= 0;
132 ctx
->nr_ops
= DIV_ROUND_UP(sg_cnt
, pages_per_mr
);
133 ctx
->reg
= kcalloc(ctx
->nr_ops
, sizeof(*ctx
->reg
), GFP_KERNEL
);
139 for (i
= 0; i
< ctx
->nr_ops
; i
++) {
140 struct rdma_rw_reg_ctx
*reg
= &ctx
->reg
[i
];
141 u32 nents
= min(sg_cnt
, pages_per_mr
);
143 ret
= rdma_rw_init_one_mr(qp
, port_num
, reg
, sg
, sg_cnt
,
150 if (reg
->mr
->need_inval
)
151 prev
->wr
.wr
.next
= ®
->inv_wr
;
153 prev
->wr
.wr
.next
= ®
->reg_wr
.wr
;
156 reg
->reg_wr
.wr
.next
= ®
->wr
.wr
;
158 reg
->wr
.wr
.sg_list
= ®
->sge
;
159 reg
->wr
.wr
.num_sge
= 1;
160 reg
->wr
.remote_addr
= remote_addr
;
162 if (dir
== DMA_TO_DEVICE
) {
163 reg
->wr
.wr
.opcode
= IB_WR_RDMA_WRITE
;
164 } else if (!rdma_cap_read_inv(qp
->device
, port_num
)) {
165 reg
->wr
.wr
.opcode
= IB_WR_RDMA_READ
;
167 reg
->wr
.wr
.opcode
= IB_WR_RDMA_READ_WITH_INV
;
168 reg
->wr
.wr
.ex
.invalidate_rkey
= reg
->mr
->lkey
;
172 remote_addr
+= reg
->sge
.length
;
174 for (j
= 0; j
< nents
; j
++)
181 prev
->wr
.wr
.next
= NULL
;
183 ctx
->type
= RDMA_RW_MR
;
188 ib_mr_pool_put(qp
, &qp
->rdma_mrs
, ctx
->reg
[i
].mr
);
194 static int rdma_rw_init_map_wrs(struct rdma_rw_ctx
*ctx
, struct ib_qp
*qp
,
195 struct scatterlist
*sg
, u32 sg_cnt
, u32 offset
,
196 u64 remote_addr
, u32 rkey
, enum dma_data_direction dir
)
198 u32 max_sge
= dir
== DMA_TO_DEVICE
? qp
->max_write_sge
:
201 u32 total_len
= 0, i
, j
;
203 ctx
->nr_ops
= DIV_ROUND_UP(sg_cnt
, max_sge
);
205 ctx
->map
.sges
= sge
= kcalloc(sg_cnt
, sizeof(*sge
), GFP_KERNEL
);
209 ctx
->map
.wrs
= kcalloc(ctx
->nr_ops
, sizeof(*ctx
->map
.wrs
), GFP_KERNEL
);
213 for (i
= 0; i
< ctx
->nr_ops
; i
++) {
214 struct ib_rdma_wr
*rdma_wr
= &ctx
->map
.wrs
[i
];
215 u32 nr_sge
= min(sg_cnt
, max_sge
);
217 if (dir
== DMA_TO_DEVICE
)
218 rdma_wr
->wr
.opcode
= IB_WR_RDMA_WRITE
;
220 rdma_wr
->wr
.opcode
= IB_WR_RDMA_READ
;
221 rdma_wr
->remote_addr
= remote_addr
+ total_len
;
222 rdma_wr
->rkey
= rkey
;
223 rdma_wr
->wr
.num_sge
= nr_sge
;
224 rdma_wr
->wr
.sg_list
= sge
;
226 for (j
= 0; j
< nr_sge
; j
++, sg
= sg_next(sg
)) {
227 sge
->addr
= sg_dma_address(sg
) + offset
;
228 sge
->length
= sg_dma_len(sg
) - offset
;
229 sge
->lkey
= qp
->pd
->local_dma_lkey
;
231 total_len
+= sge
->length
;
237 rdma_wr
->wr
.next
= i
+ 1 < ctx
->nr_ops
?
238 &ctx
->map
.wrs
[i
+ 1].wr
: NULL
;
241 ctx
->type
= RDMA_RW_MULTI_WR
;
245 kfree(ctx
->map
.sges
);
250 static int rdma_rw_init_single_wr(struct rdma_rw_ctx
*ctx
, struct ib_qp
*qp
,
251 struct scatterlist
*sg
, u32 offset
, u64 remote_addr
, u32 rkey
,
252 enum dma_data_direction dir
)
254 struct ib_rdma_wr
*rdma_wr
= &ctx
->single
.wr
;
258 ctx
->single
.sge
.lkey
= qp
->pd
->local_dma_lkey
;
259 ctx
->single
.sge
.addr
= sg_dma_address(sg
) + offset
;
260 ctx
->single
.sge
.length
= sg_dma_len(sg
) - offset
;
262 memset(rdma_wr
, 0, sizeof(*rdma_wr
));
263 if (dir
== DMA_TO_DEVICE
)
264 rdma_wr
->wr
.opcode
= IB_WR_RDMA_WRITE
;
266 rdma_wr
->wr
.opcode
= IB_WR_RDMA_READ
;
267 rdma_wr
->wr
.sg_list
= &ctx
->single
.sge
;
268 rdma_wr
->wr
.num_sge
= 1;
269 rdma_wr
->remote_addr
= remote_addr
;
270 rdma_wr
->rkey
= rkey
;
272 ctx
->type
= RDMA_RW_SINGLE_WR
;
276 static void rdma_rw_unmap_sg(struct ib_device
*dev
, struct scatterlist
*sg
,
277 u32 sg_cnt
, enum dma_data_direction dir
)
279 if (is_pci_p2pdma_page(sg_page(sg
)))
280 pci_p2pdma_unmap_sg(dev
->dma_device
, sg
, sg_cnt
, dir
);
282 ib_dma_unmap_sg(dev
, sg
, sg_cnt
, dir
);
285 static int rdma_rw_map_sg(struct ib_device
*dev
, struct scatterlist
*sg
,
286 u32 sg_cnt
, enum dma_data_direction dir
)
288 if (is_pci_p2pdma_page(sg_page(sg
))) {
289 if (WARN_ON_ONCE(ib_uses_virt_dma(dev
)))
291 return pci_p2pdma_map_sg(dev
->dma_device
, sg
, sg_cnt
, dir
);
293 return ib_dma_map_sg(dev
, sg
, sg_cnt
, dir
);
297 * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context
298 * @ctx: context to initialize
299 * @qp: queue pair to operate on
300 * @port_num: port num to which the connection is bound
301 * @sg: scatterlist to READ/WRITE from/to
302 * @sg_cnt: number of entries in @sg
303 * @sg_offset: current byte offset into @sg
304 * @remote_addr:remote address to read/write (relative to @rkey)
305 * @rkey: remote key to operate on
306 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
308 * Returns the number of WQEs that will be needed on the workqueue if
309 * successful, or a negative error code.
311 int rdma_rw_ctx_init(struct rdma_rw_ctx
*ctx
, struct ib_qp
*qp
, u8 port_num
,
312 struct scatterlist
*sg
, u32 sg_cnt
, u32 sg_offset
,
313 u64 remote_addr
, u32 rkey
, enum dma_data_direction dir
)
315 struct ib_device
*dev
= qp
->pd
->device
;
318 ret
= rdma_rw_map_sg(dev
, sg
, sg_cnt
, dir
);
324 * Skip to the S/G entry that sg_offset falls into:
327 u32 len
= sg_dma_len(sg
);
338 if (WARN_ON_ONCE(sg_cnt
== 0))
341 if (rdma_rw_io_needs_mr(qp
->device
, port_num
, dir
, sg_cnt
)) {
342 ret
= rdma_rw_init_mr_wrs(ctx
, qp
, port_num
, sg
, sg_cnt
,
343 sg_offset
, remote_addr
, rkey
, dir
);
344 } else if (sg_cnt
> 1) {
345 ret
= rdma_rw_init_map_wrs(ctx
, qp
, sg
, sg_cnt
, sg_offset
,
346 remote_addr
, rkey
, dir
);
348 ret
= rdma_rw_init_single_wr(ctx
, qp
, sg
, sg_offset
,
349 remote_addr
, rkey
, dir
);
357 rdma_rw_unmap_sg(dev
, sg
, sg_cnt
, dir
);
360 EXPORT_SYMBOL(rdma_rw_ctx_init
);
363 * rdma_rw_ctx_signature_init - initialize a RW context with signature offload
364 * @ctx: context to initialize
365 * @qp: queue pair to operate on
366 * @port_num: port num to which the connection is bound
367 * @sg: scatterlist to READ/WRITE from/to
368 * @sg_cnt: number of entries in @sg
369 * @prot_sg: scatterlist to READ/WRITE protection information from/to
370 * @prot_sg_cnt: number of entries in @prot_sg
371 * @sig_attrs: signature offloading algorithms
372 * @remote_addr:remote address to read/write (relative to @rkey)
373 * @rkey: remote key to operate on
374 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
376 * Returns the number of WQEs that will be needed on the workqueue if
377 * successful, or a negative error code.
379 int rdma_rw_ctx_signature_init(struct rdma_rw_ctx
*ctx
, struct ib_qp
*qp
,
380 u8 port_num
, struct scatterlist
*sg
, u32 sg_cnt
,
381 struct scatterlist
*prot_sg
, u32 prot_sg_cnt
,
382 struct ib_sig_attrs
*sig_attrs
,
383 u64 remote_addr
, u32 rkey
, enum dma_data_direction dir
)
385 struct ib_device
*dev
= qp
->pd
->device
;
386 u32 pages_per_mr
= rdma_rw_fr_page_list_len(qp
->pd
->device
,
388 struct ib_rdma_wr
*rdma_wr
;
391 if (sg_cnt
> pages_per_mr
|| prot_sg_cnt
> pages_per_mr
) {
392 pr_err("SG count too large: sg_cnt=%d, prot_sg_cnt=%d, pages_per_mr=%d\n",
393 sg_cnt
, prot_sg_cnt
, pages_per_mr
);
397 ret
= rdma_rw_map_sg(dev
, sg
, sg_cnt
, dir
);
403 ret
= rdma_rw_map_sg(dev
, prot_sg
, prot_sg_cnt
, dir
);
411 ctx
->type
= RDMA_RW_SIG_MR
;
413 ctx
->reg
= kcalloc(1, sizeof(*ctx
->reg
), GFP_KERNEL
);
416 goto out_unmap_prot_sg
;
419 ctx
->reg
->mr
= ib_mr_pool_get(qp
, &qp
->sig_mrs
);
425 count
+= rdma_rw_inv_key(ctx
->reg
);
427 memcpy(ctx
->reg
->mr
->sig_attrs
, sig_attrs
, sizeof(struct ib_sig_attrs
));
429 ret
= ib_map_mr_sg_pi(ctx
->reg
->mr
, sg
, sg_cnt
, NULL
, prot_sg
,
430 prot_sg_cnt
, NULL
, SZ_4K
);
432 pr_err("failed to map PI sg (%d)\n", sg_cnt
+ prot_sg_cnt
);
433 goto out_destroy_sig_mr
;
436 ctx
->reg
->reg_wr
.wr
.opcode
= IB_WR_REG_MR_INTEGRITY
;
437 ctx
->reg
->reg_wr
.wr
.wr_cqe
= NULL
;
438 ctx
->reg
->reg_wr
.wr
.num_sge
= 0;
439 ctx
->reg
->reg_wr
.wr
.send_flags
= 0;
440 ctx
->reg
->reg_wr
.access
= IB_ACCESS_LOCAL_WRITE
;
441 if (rdma_protocol_iwarp(qp
->device
, port_num
))
442 ctx
->reg
->reg_wr
.access
|= IB_ACCESS_REMOTE_WRITE
;
443 ctx
->reg
->reg_wr
.mr
= ctx
->reg
->mr
;
444 ctx
->reg
->reg_wr
.key
= ctx
->reg
->mr
->lkey
;
447 ctx
->reg
->sge
.addr
= ctx
->reg
->mr
->iova
;
448 ctx
->reg
->sge
.length
= ctx
->reg
->mr
->length
;
449 if (sig_attrs
->wire
.sig_type
== IB_SIG_TYPE_NONE
)
450 ctx
->reg
->sge
.length
-= ctx
->reg
->mr
->sig_attrs
->meta_length
;
452 rdma_wr
= &ctx
->reg
->wr
;
453 rdma_wr
->wr
.sg_list
= &ctx
->reg
->sge
;
454 rdma_wr
->wr
.num_sge
= 1;
455 rdma_wr
->remote_addr
= remote_addr
;
456 rdma_wr
->rkey
= rkey
;
457 if (dir
== DMA_TO_DEVICE
)
458 rdma_wr
->wr
.opcode
= IB_WR_RDMA_WRITE
;
460 rdma_wr
->wr
.opcode
= IB_WR_RDMA_READ
;
461 ctx
->reg
->reg_wr
.wr
.next
= &rdma_wr
->wr
;
467 ib_mr_pool_put(qp
, &qp
->sig_mrs
, ctx
->reg
->mr
);
472 rdma_rw_unmap_sg(dev
, prot_sg
, prot_sg_cnt
, dir
);
474 rdma_rw_unmap_sg(dev
, sg
, sg_cnt
, dir
);
477 EXPORT_SYMBOL(rdma_rw_ctx_signature_init
);
480 * Now that we are going to post the WRs we can update the lkey and need_inval
481 * state on the MRs. If we were doing this at init time, we would get double
482 * or missing invalidations if a context was initialized but not actually
485 static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx
*reg
, bool need_inval
)
487 reg
->mr
->need_inval
= need_inval
;
488 ib_update_fast_reg_key(reg
->mr
, ib_inc_rkey(reg
->mr
->lkey
));
489 reg
->reg_wr
.key
= reg
->mr
->lkey
;
490 reg
->sge
.lkey
= reg
->mr
->lkey
;
494 * rdma_rw_ctx_wrs - return chain of WRs for a RDMA READ or WRITE operation
495 * @ctx: context to operate on
496 * @qp: queue pair to operate on
497 * @port_num: port num to which the connection is bound
498 * @cqe: completion queue entry for the last WR
499 * @chain_wr: WR to append to the posted chain
501 * Return the WR chain for the set of RDMA READ/WRITE operations described by
502 * @ctx, as well as any memory registration operations needed. If @chain_wr
503 * is non-NULL the WR it points to will be appended to the chain of WRs posted.
504 * If @chain_wr is not set @cqe must be set so that the caller gets a
505 * completion notification.
507 struct ib_send_wr
*rdma_rw_ctx_wrs(struct rdma_rw_ctx
*ctx
, struct ib_qp
*qp
,
508 u8 port_num
, struct ib_cqe
*cqe
, struct ib_send_wr
*chain_wr
)
510 struct ib_send_wr
*first_wr
, *last_wr
;
516 for (i
= 0; i
< ctx
->nr_ops
; i
++) {
517 rdma_rw_update_lkey(&ctx
->reg
[i
],
518 ctx
->reg
[i
].wr
.wr
.opcode
!=
519 IB_WR_RDMA_READ_WITH_INV
);
522 if (ctx
->reg
[0].inv_wr
.next
)
523 first_wr
= &ctx
->reg
[0].inv_wr
;
525 first_wr
= &ctx
->reg
[0].reg_wr
.wr
;
526 last_wr
= &ctx
->reg
[ctx
->nr_ops
- 1].wr
.wr
;
528 case RDMA_RW_MULTI_WR
:
529 first_wr
= &ctx
->map
.wrs
[0].wr
;
530 last_wr
= &ctx
->map
.wrs
[ctx
->nr_ops
- 1].wr
;
532 case RDMA_RW_SINGLE_WR
:
533 first_wr
= &ctx
->single
.wr
.wr
;
534 last_wr
= &ctx
->single
.wr
.wr
;
541 last_wr
->next
= chain_wr
;
543 last_wr
->wr_cqe
= cqe
;
544 last_wr
->send_flags
|= IB_SEND_SIGNALED
;
549 EXPORT_SYMBOL(rdma_rw_ctx_wrs
);
552 * rdma_rw_ctx_post - post a RDMA READ or RDMA WRITE operation
553 * @ctx: context to operate on
554 * @qp: queue pair to operate on
555 * @port_num: port num to which the connection is bound
556 * @cqe: completion queue entry for the last WR
557 * @chain_wr: WR to append to the posted chain
559 * Post the set of RDMA READ/WRITE operations described by @ctx, as well as
560 * any memory registration operations needed. If @chain_wr is non-NULL the
561 * WR it points to will be appended to the chain of WRs posted. If @chain_wr
562 * is not set @cqe must be set so that the caller gets a completion
565 int rdma_rw_ctx_post(struct rdma_rw_ctx
*ctx
, struct ib_qp
*qp
, u8 port_num
,
566 struct ib_cqe
*cqe
, struct ib_send_wr
*chain_wr
)
568 struct ib_send_wr
*first_wr
;
570 first_wr
= rdma_rw_ctx_wrs(ctx
, qp
, port_num
, cqe
, chain_wr
);
571 return ib_post_send(qp
, first_wr
, NULL
);
573 EXPORT_SYMBOL(rdma_rw_ctx_post
);
576 * rdma_rw_ctx_destroy - release all resources allocated by rdma_rw_ctx_init
577 * @ctx: context to release
578 * @qp: queue pair to operate on
579 * @port_num: port num to which the connection is bound
580 * @sg: scatterlist that was used for the READ/WRITE
581 * @sg_cnt: number of entries in @sg
582 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
584 void rdma_rw_ctx_destroy(struct rdma_rw_ctx
*ctx
, struct ib_qp
*qp
, u8 port_num
,
585 struct scatterlist
*sg
, u32 sg_cnt
, enum dma_data_direction dir
)
591 for (i
= 0; i
< ctx
->nr_ops
; i
++)
592 ib_mr_pool_put(qp
, &qp
->rdma_mrs
, ctx
->reg
[i
].mr
);
595 case RDMA_RW_MULTI_WR
:
597 kfree(ctx
->map
.sges
);
599 case RDMA_RW_SINGLE_WR
:
606 rdma_rw_unmap_sg(qp
->pd
->device
, sg
, sg_cnt
, dir
);
608 EXPORT_SYMBOL(rdma_rw_ctx_destroy
);
611 * rdma_rw_ctx_destroy_signature - release all resources allocated by
612 * rdma_rw_ctx_signature_init
613 * @ctx: context to release
614 * @qp: queue pair to operate on
615 * @port_num: port num to which the connection is bound
616 * @sg: scatterlist that was used for the READ/WRITE
617 * @sg_cnt: number of entries in @sg
618 * @prot_sg: scatterlist that was used for the READ/WRITE of the PI
619 * @prot_sg_cnt: number of entries in @prot_sg
620 * @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
622 void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx
*ctx
, struct ib_qp
*qp
,
623 u8 port_num
, struct scatterlist
*sg
, u32 sg_cnt
,
624 struct scatterlist
*prot_sg
, u32 prot_sg_cnt
,
625 enum dma_data_direction dir
)
627 if (WARN_ON_ONCE(ctx
->type
!= RDMA_RW_SIG_MR
))
630 ib_mr_pool_put(qp
, &qp
->sig_mrs
, ctx
->reg
->mr
);
634 rdma_rw_unmap_sg(qp
->pd
->device
, prot_sg
, prot_sg_cnt
, dir
);
635 rdma_rw_unmap_sg(qp
->pd
->device
, sg
, sg_cnt
, dir
);
637 EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature
);
640 * rdma_rw_mr_factor - return number of MRs required for a payload
641 * @device: device handling the connection
642 * @port_num: port num to which the connection is bound
643 * @maxpages: maximum payload pages per rdma_rw_ctx
645 * Returns the number of MRs the device requires to move @maxpayload
646 * bytes. The returned value is used during transport creation to
647 * compute max_rdma_ctxts and the size of the transport's Send and
648 * Send Completion Queues.
650 unsigned int rdma_rw_mr_factor(struct ib_device
*device
, u8 port_num
,
651 unsigned int maxpages
)
653 unsigned int mr_pages
;
655 if (rdma_rw_can_use_mr(device
, port_num
))
656 mr_pages
= rdma_rw_fr_page_list_len(device
, false);
658 mr_pages
= device
->attrs
.max_sge_rd
;
659 return DIV_ROUND_UP(maxpages
, mr_pages
);
661 EXPORT_SYMBOL(rdma_rw_mr_factor
);
663 void rdma_rw_init_qp(struct ib_device
*dev
, struct ib_qp_init_attr
*attr
)
667 WARN_ON_ONCE(attr
->port_num
== 0);
670 * Each context needs at least one RDMA READ or WRITE WR.
672 * For some hardware we might need more, eventually we should ask the
673 * HCA driver for a multiplier here.
678 * If the devices needs MRs to perform RDMA READ or WRITE operations,
679 * we'll need two additional MRs for the registrations and the
682 if (attr
->create_flags
& IB_QP_CREATE_INTEGRITY_EN
||
683 rdma_rw_can_use_mr(dev
, attr
->port_num
))
684 factor
+= 2; /* inv + reg */
686 attr
->cap
.max_send_wr
+= factor
* attr
->cap
.max_rdma_ctxs
;
689 * But maybe we were just too high in the sky and the device doesn't
690 * even support all we need, and we'll have to live with what we get..
692 attr
->cap
.max_send_wr
=
693 min_t(u32
, attr
->cap
.max_send_wr
, dev
->attrs
.max_qp_wr
);
696 int rdma_rw_init_mrs(struct ib_qp
*qp
, struct ib_qp_init_attr
*attr
)
698 struct ib_device
*dev
= qp
->pd
->device
;
699 u32 nr_mrs
= 0, nr_sig_mrs
= 0, max_num_sg
= 0;
702 if (attr
->create_flags
& IB_QP_CREATE_INTEGRITY_EN
) {
703 nr_sig_mrs
= attr
->cap
.max_rdma_ctxs
;
704 nr_mrs
= attr
->cap
.max_rdma_ctxs
;
705 max_num_sg
= rdma_rw_fr_page_list_len(dev
, true);
706 } else if (rdma_rw_can_use_mr(dev
, attr
->port_num
)) {
707 nr_mrs
= attr
->cap
.max_rdma_ctxs
;
708 max_num_sg
= rdma_rw_fr_page_list_len(dev
, false);
712 ret
= ib_mr_pool_init(qp
, &qp
->rdma_mrs
, nr_mrs
,
716 pr_err("%s: failed to allocated %d MRs\n",
723 ret
= ib_mr_pool_init(qp
, &qp
->sig_mrs
, nr_sig_mrs
,
724 IB_MR_TYPE_INTEGRITY
, max_num_sg
, max_num_sg
);
726 pr_err("%s: failed to allocated %d SIG MRs\n",
727 __func__
, nr_sig_mrs
);
728 goto out_free_rdma_mrs
;
735 ib_mr_pool_destroy(qp
, &qp
->rdma_mrs
);
739 void rdma_rw_cleanup_mrs(struct ib_qp
*qp
)
741 ib_mr_pool_destroy(qp
, &qp
->sig_mrs
);
742 ib_mr_pool_destroy(qp
, &qp
->rdma_mrs
);