1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
4 #include <rdma/ib_umem_odp.h>
10 * We can't use an array for xlt_emergency_page because dma_map_single doesn't
11 * work on kernel modules memory
13 void *xlt_emergency_page
;
14 static DEFINE_MUTEX(xlt_emergency_page_mutex
);
16 static __be64
get_umr_enable_mr_mask(void)
20 result
= MLX5_MKEY_MASK_KEY
|
23 return cpu_to_be64(result
);
26 static __be64
get_umr_disable_mr_mask(void)
30 result
= MLX5_MKEY_MASK_FREE
;
32 return cpu_to_be64(result
);
35 static __be64
get_umr_update_translation_mask(void)
39 result
= MLX5_MKEY_MASK_LEN
|
40 MLX5_MKEY_MASK_PAGE_SIZE
|
41 MLX5_MKEY_MASK_START_ADDR
;
43 return cpu_to_be64(result
);
46 static __be64
get_umr_update_access_mask(struct mlx5_ib_dev
*dev
)
50 result
= MLX5_MKEY_MASK_LR
|
55 if (MLX5_CAP_GEN(dev
->mdev
, atomic
))
56 result
|= MLX5_MKEY_MASK_A
;
58 if (MLX5_CAP_GEN(dev
->mdev
, relaxed_ordering_write_umr
))
59 result
|= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE
;
61 if (MLX5_CAP_GEN(dev
->mdev
, relaxed_ordering_read_umr
))
62 result
|= MLX5_MKEY_MASK_RELAXED_ORDERING_READ
;
64 return cpu_to_be64(result
);
67 static __be64
get_umr_update_pd_mask(void)
71 result
= MLX5_MKEY_MASK_PD
;
73 return cpu_to_be64(result
);
76 static int umr_check_mkey_mask(struct mlx5_ib_dev
*dev
, u64 mask
)
78 if (mask
& MLX5_MKEY_MASK_PAGE_SIZE
&&
79 MLX5_CAP_GEN(dev
->mdev
, umr_modify_entity_size_disabled
))
82 if (mask
& MLX5_MKEY_MASK_A
&&
83 MLX5_CAP_GEN(dev
->mdev
, umr_modify_atomic_disabled
))
86 if (mask
& MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE
&&
87 !MLX5_CAP_GEN(dev
->mdev
, relaxed_ordering_write_umr
))
90 if (mask
& MLX5_MKEY_MASK_RELAXED_ORDERING_READ
&&
91 !MLX5_CAP_GEN(dev
->mdev
, relaxed_ordering_read_umr
))
101 static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev
*dev
, struct ib_qp
*qp
)
103 struct ib_qp_attr attr
= {};
106 attr
.qp_state
= IB_QPS_INIT
;
108 ret
= ib_modify_qp(qp
, &attr
,
109 IB_QP_STATE
| IB_QP_PKEY_INDEX
| IB_QP_PORT
);
111 mlx5_ib_dbg(dev
, "Couldn't modify UMR QP\n");
115 memset(&attr
, 0, sizeof(attr
));
116 attr
.qp_state
= IB_QPS_RTR
;
118 ret
= ib_modify_qp(qp
, &attr
, IB_QP_STATE
);
120 mlx5_ib_dbg(dev
, "Couldn't modify umr QP to rtr\n");
124 memset(&attr
, 0, sizeof(attr
));
125 attr
.qp_state
= IB_QPS_RTS
;
126 ret
= ib_modify_qp(qp
, &attr
, IB_QP_STATE
);
128 mlx5_ib_dbg(dev
, "Couldn't modify umr QP to rts\n");
135 int mlx5r_umr_resource_init(struct mlx5_ib_dev
*dev
)
137 struct ib_qp_init_attr init_attr
= {};
144 * UMR qp is set once, never changed until device unload.
145 * Avoid taking the mutex if initialization is already done.
150 mutex_lock(&dev
->umrc
.init_lock
);
151 /* First user allocates the UMR resources. Skip if already allocated. */
155 cq
= ib_alloc_cq(&dev
->ib_dev
, NULL
, 128, 0, IB_POLL_SOFTIRQ
);
157 mlx5_ib_dbg(dev
, "Couldn't create CQ for sync UMR QP\n");
162 init_attr
.send_cq
= cq
;
163 init_attr
.recv_cq
= cq
;
164 init_attr
.sq_sig_type
= IB_SIGNAL_ALL_WR
;
165 init_attr
.cap
.max_send_wr
= MAX_UMR_WR
;
166 init_attr
.cap
.max_send_sge
= 1;
167 init_attr
.qp_type
= MLX5_IB_QPT_REG_UMR
;
168 init_attr
.port_num
= 1;
169 qp
= ib_create_qp(dev
->umrc
.pd
, &init_attr
);
171 mlx5_ib_dbg(dev
, "Couldn't create sync UMR QP\n");
176 ret
= mlx5r_umr_qp_rst2rts(dev
, qp
);
182 sema_init(&dev
->umrc
.sem
, MAX_UMR_WR
);
183 mutex_init(&dev
->umrc
.lock
);
184 dev
->umrc
.state
= MLX5_UMR_STATE_ACTIVE
;
187 mutex_unlock(&dev
->umrc
.init_lock
);
195 mutex_unlock(&dev
->umrc
.init_lock
);
199 void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev
*dev
)
201 if (dev
->umrc
.state
== MLX5_UMR_STATE_UNINIT
)
203 mutex_destroy(&dev
->umrc
.lock
);
204 /* After device init, UMR cp/qp are not unset during the lifetime. */
205 ib_destroy_qp(dev
->umrc
.qp
);
206 ib_free_cq(dev
->umrc
.cq
);
209 int mlx5r_umr_init(struct mlx5_ib_dev
*dev
)
213 pd
= ib_alloc_pd(&dev
->ib_dev
, 0);
215 mlx5_ib_dbg(dev
, "Couldn't create PD for sync UMR QP\n");
220 mutex_init(&dev
->umrc
.init_lock
);
225 void mlx5r_umr_cleanup(struct mlx5_ib_dev
*dev
)
230 mutex_destroy(&dev
->umrc
.init_lock
);
231 ib_dealloc_pd(dev
->umrc
.pd
);
234 static int mlx5r_umr_recover(struct mlx5_ib_dev
*dev
)
236 struct umr_common
*umrc
= &dev
->umrc
;
237 struct ib_qp_attr attr
;
240 attr
.qp_state
= IB_QPS_RESET
;
241 err
= ib_modify_qp(umrc
->qp
, &attr
, IB_QP_STATE
);
243 mlx5_ib_dbg(dev
, "Couldn't modify UMR QP\n");
247 err
= mlx5r_umr_qp_rst2rts(dev
, umrc
->qp
);
251 umrc
->state
= MLX5_UMR_STATE_ACTIVE
;
255 umrc
->state
= MLX5_UMR_STATE_ERR
;
259 static int mlx5r_umr_post_send(struct ib_qp
*ibqp
, u32 mkey
, struct ib_cqe
*cqe
,
260 struct mlx5r_umr_wqe
*wqe
, bool with_data
)
262 unsigned int wqe_size
=
263 with_data
? sizeof(struct mlx5r_umr_wqe
) :
264 sizeof(struct mlx5r_umr_wqe
) -
265 sizeof(struct mlx5_wqe_data_seg
);
266 struct mlx5_ib_dev
*dev
= to_mdev(ibqp
->device
);
267 struct mlx5_core_dev
*mdev
= dev
->mdev
;
268 struct mlx5_ib_qp
*qp
= to_mqp(ibqp
);
269 struct mlx5_wqe_ctrl_seg
*ctrl
;
271 struct ib_cqe
*ib_cqe
;
274 void *cur_edge
, *seg
;
279 if (unlikely(mdev
->state
== MLX5_DEVICE_STATE_INTERNAL_ERROR
))
282 spin_lock_irqsave(&qp
->sq
.lock
, flags
);
284 err
= mlx5r_begin_wqe(qp
, &seg
, &ctrl
, &idx
, &size
, &cur_edge
, 0,
285 cpu_to_be32(mkey
), false, false);
289 qp
->sq
.wr_data
[idx
] = MLX5_IB_WR_UMR
;
291 mlx5r_memcpy_send_wqe(&qp
->sq
, &cur_edge
, &seg
, &size
, wqe
, wqe_size
);
294 mlx5r_finish_wqe(qp
, ctrl
, seg
, size
, cur_edge
, idx
, id
.wr_id
, 0,
295 MLX5_FENCE_MODE_INITIATOR_SMALL
, MLX5_OPCODE_UMR
);
297 mlx5r_ring_db(qp
, 1, ctrl
);
300 spin_unlock_irqrestore(&qp
->sq
.lock
, flags
);
305 static void mlx5r_umr_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
307 struct mlx5_ib_umr_context
*context
=
308 container_of(wc
->wr_cqe
, struct mlx5_ib_umr_context
, cqe
);
310 context
->status
= wc
->status
;
311 complete(&context
->done
);
314 static inline void mlx5r_umr_init_context(struct mlx5r_umr_context
*context
)
316 context
->cqe
.done
= mlx5r_umr_done
;
317 init_completion(&context
->done
);
320 static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev
*dev
, u32 mkey
,
321 struct mlx5r_umr_wqe
*wqe
, bool with_data
)
323 struct umr_common
*umrc
= &dev
->umrc
;
324 struct mlx5r_umr_context umr_context
;
327 err
= umr_check_mkey_mask(dev
, be64_to_cpu(wqe
->ctrl_seg
.mkey_mask
));
331 mlx5r_umr_init_context(&umr_context
);
335 mutex_lock(&umrc
->lock
);
336 if (umrc
->state
== MLX5_UMR_STATE_ERR
) {
337 mutex_unlock(&umrc
->lock
);
342 if (umrc
->state
== MLX5_UMR_STATE_RECOVER
) {
343 mutex_unlock(&umrc
->lock
);
344 usleep_range(3000, 5000);
348 err
= mlx5r_umr_post_send(umrc
->qp
, mkey
, &umr_context
.cqe
, wqe
,
350 mutex_unlock(&umrc
->lock
);
352 mlx5_ib_warn(dev
, "UMR post send failed, err %d\n",
357 wait_for_completion(&umr_context
.done
);
359 if (umr_context
.status
== IB_WC_SUCCESS
)
362 if (umr_context
.status
== IB_WC_WR_FLUSH_ERR
)
367 "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
368 umr_context
.status
, mkey
);
369 mutex_lock(&umrc
->lock
);
370 err
= mlx5r_umr_recover(dev
);
371 mutex_unlock(&umrc
->lock
);
373 mlx5_ib_warn(dev
, "couldn't recover UMR, err %d\n",
383 * mlx5r_umr_revoke_mr - Fence all DMA on the MR
384 * @mr: The MR to fence
386 * Upon return the NIC will not be doing any DMA to the pages under the MR,
387 * and any DMA in progress will be completed. Failure of this function
388 * indicates the HW has failed catastrophically.
390 int mlx5r_umr_revoke_mr(struct mlx5_ib_mr
*mr
)
392 struct mlx5_ib_dev
*dev
= mr_to_mdev(mr
);
393 struct mlx5r_umr_wqe wqe
= {};
395 if (dev
->mdev
->state
== MLX5_DEVICE_STATE_INTERNAL_ERROR
)
398 wqe
.ctrl_seg
.mkey_mask
|= get_umr_update_pd_mask();
399 wqe
.ctrl_seg
.mkey_mask
|= get_umr_disable_mr_mask();
400 wqe
.ctrl_seg
.flags
|= MLX5_UMR_INLINE
;
402 MLX5_SET(mkc
, &wqe
.mkey_seg
, free
, 1);
403 MLX5_SET(mkc
, &wqe
.mkey_seg
, pd
, to_mpd(dev
->umrc
.pd
)->pdn
);
404 MLX5_SET(mkc
, &wqe
.mkey_seg
, qpn
, 0xffffff);
405 MLX5_SET(mkc
, &wqe
.mkey_seg
, mkey_7_0
,
406 mlx5_mkey_variant(mr
->mmkey
.key
));
408 return mlx5r_umr_post_send_wait(dev
, mr
->mmkey
.key
, &wqe
, false);
411 static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev
*dev
,
412 struct mlx5_mkey_seg
*seg
,
413 unsigned int access_flags
)
415 bool ro_read
= (access_flags
& IB_ACCESS_RELAXED_ORDERING
) &&
416 (MLX5_CAP_GEN(dev
->mdev
, relaxed_ordering_read
) ||
417 pcie_relaxed_ordering_enabled(dev
->mdev
->pdev
));
419 MLX5_SET(mkc
, seg
, a
, !!(access_flags
& IB_ACCESS_REMOTE_ATOMIC
));
420 MLX5_SET(mkc
, seg
, rw
, !!(access_flags
& IB_ACCESS_REMOTE_WRITE
));
421 MLX5_SET(mkc
, seg
, rr
, !!(access_flags
& IB_ACCESS_REMOTE_READ
));
422 MLX5_SET(mkc
, seg
, lw
, !!(access_flags
& IB_ACCESS_LOCAL_WRITE
));
423 MLX5_SET(mkc
, seg
, lr
, 1);
424 MLX5_SET(mkc
, seg
, relaxed_ordering_write
,
425 !!(access_flags
& IB_ACCESS_RELAXED_ORDERING
));
426 MLX5_SET(mkc
, seg
, relaxed_ordering_read
, ro_read
);
429 int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr
*mr
, struct ib_pd
*pd
,
432 struct mlx5_ib_dev
*dev
= mr_to_mdev(mr
);
433 struct mlx5r_umr_wqe wqe
= {};
436 wqe
.ctrl_seg
.mkey_mask
= get_umr_update_access_mask(dev
);
437 wqe
.ctrl_seg
.mkey_mask
|= get_umr_update_pd_mask();
438 wqe
.ctrl_seg
.flags
= MLX5_UMR_CHECK_FREE
;
439 wqe
.ctrl_seg
.flags
|= MLX5_UMR_INLINE
;
441 mlx5r_umr_set_access_flags(dev
, &wqe
.mkey_seg
, access_flags
);
442 MLX5_SET(mkc
, &wqe
.mkey_seg
, pd
, to_mpd(pd
)->pdn
);
443 MLX5_SET(mkc
, &wqe
.mkey_seg
, qpn
, 0xffffff);
444 MLX5_SET(mkc
, &wqe
.mkey_seg
, mkey_7_0
,
445 mlx5_mkey_variant(mr
->mmkey
.key
));
447 err
= mlx5r_umr_post_send_wait(dev
, mr
->mmkey
.key
, &wqe
, false);
451 mr
->access_flags
= access_flags
;
455 #define MLX5_MAX_UMR_CHUNK \
456 ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_FLEX_ALIGNMENT)
457 #define MLX5_SPARE_UMR_CHUNK 0x10000
460 * Allocate a temporary buffer to hold the per-page information to transfer to
461 * HW. For efficiency this should be as large as it can be, but buffer
462 * allocation failure is not allowed, so try smaller sizes.
464 static void *mlx5r_umr_alloc_xlt(size_t *nents
, size_t ent_size
, gfp_t gfp_mask
)
466 const size_t xlt_chunk_align
= MLX5_UMR_FLEX_ALIGNMENT
/ ent_size
;
470 static_assert(PAGE_SIZE
% MLX5_UMR_FLEX_ALIGNMENT
== 0);
473 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
474 * allocation can't trigger any kind of reclaim.
478 gfp_mask
|= __GFP_ZERO
| __GFP_NORETRY
;
481 * If the system already has a suitable high order page then just use
482 * that, but don't try hard to create one. This max is about 1M, so a
483 * free x86 huge page will satisfy it.
485 size
= min_t(size_t, ent_size
* ALIGN(*nents
, xlt_chunk_align
),
487 *nents
= size
/ ent_size
;
488 res
= (void *)__get_free_pages(gfp_mask
| __GFP_NOWARN
,
493 if (size
> MLX5_SPARE_UMR_CHUNK
) {
494 size
= MLX5_SPARE_UMR_CHUNK
;
495 *nents
= size
/ ent_size
;
496 res
= (void *)__get_free_pages(gfp_mask
| __GFP_NOWARN
,
502 *nents
= PAGE_SIZE
/ ent_size
;
503 res
= (void *)__get_free_page(gfp_mask
);
507 mutex_lock(&xlt_emergency_page_mutex
);
508 memset(xlt_emergency_page
, 0, PAGE_SIZE
);
509 return xlt_emergency_page
;
512 static void mlx5r_umr_free_xlt(void *xlt
, size_t length
)
514 if (xlt
== xlt_emergency_page
) {
515 mutex_unlock(&xlt_emergency_page_mutex
);
519 free_pages((unsigned long)xlt
, get_order(length
));
522 static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev
*dev
, void *xlt
,
525 struct device
*ddev
= &dev
->mdev
->pdev
->dev
;
527 dma_unmap_single(ddev
, sg
->addr
, sg
->length
, DMA_TO_DEVICE
);
528 mlx5r_umr_free_xlt(xlt
, sg
->length
);
532 * Create an XLT buffer ready for submission.
534 static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev
*dev
, struct ib_sge
*sg
,
535 size_t nents
, size_t ent_size
,
538 struct device
*ddev
= &dev
->mdev
->pdev
->dev
;
542 xlt
= mlx5r_umr_alloc_xlt(&nents
, ent_size
,
543 flags
& MLX5_IB_UPD_XLT_ATOMIC
? GFP_ATOMIC
:
545 sg
->length
= nents
* ent_size
;
546 dma
= dma_map_single(ddev
, xlt
, sg
->length
, DMA_TO_DEVICE
);
547 if (dma_mapping_error(ddev
, dma
)) {
548 mlx5_ib_err(dev
, "unable to map DMA during XLT update.\n");
549 mlx5r_umr_free_xlt(xlt
, sg
->length
);
553 sg
->lkey
= dev
->umrc
.pd
->local_dma_lkey
;
559 mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg
*ctrl_seg
,
560 unsigned int flags
, struct ib_sge
*sg
)
562 if (!(flags
& MLX5_IB_UPD_XLT_ENABLE
))
564 ctrl_seg
->flags
= MLX5_UMR_CHECK_FREE
;
566 /* fail if not free */
567 ctrl_seg
->flags
= MLX5_UMR_CHECK_NOT_FREE
;
568 ctrl_seg
->xlt_octowords
=
569 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg
->length
));
572 static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev
*dev
,
573 struct mlx5_mkey_seg
*mkey_seg
,
574 struct mlx5_ib_mr
*mr
,
575 unsigned int page_shift
)
577 mlx5r_umr_set_access_flags(dev
, mkey_seg
, mr
->access_flags
);
578 MLX5_SET(mkc
, mkey_seg
, pd
, to_mpd(mr
->ibmr
.pd
)->pdn
);
579 MLX5_SET64(mkc
, mkey_seg
, start_addr
, mr
->ibmr
.iova
);
580 MLX5_SET64(mkc
, mkey_seg
, len
, mr
->ibmr
.length
);
581 MLX5_SET(mkc
, mkey_seg
, log_page_size
, page_shift
);
582 MLX5_SET(mkc
, mkey_seg
, qpn
, 0xffffff);
583 MLX5_SET(mkc
, mkey_seg
, mkey_7_0
, mlx5_mkey_variant(mr
->mmkey
.key
));
587 mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg
*data_seg
,
590 data_seg
->byte_count
= cpu_to_be32(sg
->length
);
591 data_seg
->lkey
= cpu_to_be32(sg
->lkey
);
592 data_seg
->addr
= cpu_to_be64(sg
->addr
);
595 static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg
*ctrl_seg
,
598 u64 octo_offset
= mlx5r_umr_get_xlt_octo(offset
);
600 ctrl_seg
->xlt_offset
= cpu_to_be16(octo_offset
& 0xffff);
601 ctrl_seg
->xlt_offset_47_16
= cpu_to_be32(octo_offset
>> 16);
602 ctrl_seg
->flags
|= MLX5_UMR_TRANSLATION_OFFSET_EN
;
605 static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev
*dev
,
606 struct mlx5r_umr_wqe
*wqe
,
607 struct mlx5_ib_mr
*mr
, struct ib_sge
*sg
,
610 bool update_pd_access
, update_translation
;
612 if (flags
& MLX5_IB_UPD_XLT_ENABLE
)
613 wqe
->ctrl_seg
.mkey_mask
|= get_umr_enable_mr_mask();
615 update_pd_access
= flags
& MLX5_IB_UPD_XLT_ENABLE
||
616 flags
& MLX5_IB_UPD_XLT_PD
||
617 flags
& MLX5_IB_UPD_XLT_ACCESS
;
619 if (update_pd_access
) {
620 wqe
->ctrl_seg
.mkey_mask
|= get_umr_update_access_mask(dev
);
621 wqe
->ctrl_seg
.mkey_mask
|= get_umr_update_pd_mask();
625 flags
& MLX5_IB_UPD_XLT_ENABLE
|| flags
& MLX5_IB_UPD_XLT_ADDR
;
627 if (update_translation
) {
628 wqe
->ctrl_seg
.mkey_mask
|= get_umr_update_translation_mask();
629 if (!mr
->ibmr
.length
)
630 MLX5_SET(mkc
, &wqe
->mkey_seg
, length64
, 1);
633 wqe
->ctrl_seg
.xlt_octowords
=
634 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg
->length
));
635 wqe
->data_seg
.byte_count
= cpu_to_be32(sg
->length
);
639 _mlx5r_umr_update_mr_pas(struct mlx5_ib_mr
*mr
, unsigned int flags
, bool dd
)
641 size_t ent_size
= dd
? sizeof(struct mlx5_ksm
) : sizeof(struct mlx5_mtt
);
642 struct mlx5_ib_dev
*dev
= mr_to_mdev(mr
);
643 struct device
*ddev
= &dev
->mdev
->pdev
->dev
;
644 struct mlx5r_umr_wqe wqe
= {};
645 struct ib_block_iter biter
;
646 struct mlx5_ksm
*cur_ksm
;
647 struct mlx5_mtt
*cur_mtt
;
648 size_t orig_sg_length
;
656 entry
= mlx5r_umr_create_xlt(dev
, &sg
,
657 ib_umem_num_dma_blocks(mr
->umem
, 1 << mr
->page_shift
),
662 orig_sg_length
= sg
.length
;
663 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe
.ctrl_seg
, flags
, &sg
);
664 mlx5r_umr_set_update_xlt_mkey_seg(dev
, &wqe
.mkey_seg
, mr
,
667 /* Use the data direct internal kernel PD */
668 MLX5_SET(mkc
, &wqe
.mkey_seg
, pd
, dev
->ddr
.pdn
);
674 mlx5r_umr_set_update_xlt_data_seg(&wqe
.data_seg
, &sg
);
677 rdma_umem_for_each_dma_block(mr
->umem
, &biter
, BIT(mr
->page_shift
)) {
678 if (curr_entry
== entry
+ sg
.length
) {
679 dma_sync_single_for_device(ddev
, sg
.addr
, sg
.length
,
682 err
= mlx5r_umr_post_send_wait(dev
, mr
->mmkey
.key
, &wqe
,
686 dma_sync_single_for_cpu(ddev
, sg
.addr
, sg
.length
,
689 mlx5r_umr_update_offset(&wqe
.ctrl_seg
, offset
);
697 cur_ksm
->va
= cpu_to_be64(rdma_block_iter_dma_address(&biter
));
698 cur_ksm
->key
= cpu_to_be32(dev
->ddr
.mkey
);
700 curr_entry
= cur_ksm
;
703 cpu_to_be64(rdma_block_iter_dma_address(&biter
) |
704 MLX5_IB_MTT_PRESENT
);
705 if (mr
->umem
->is_dmabuf
&& (flags
& MLX5_IB_UPD_XLT_ZAP
))
708 curr_entry
= cur_mtt
;
712 final_size
= curr_entry
- entry
;
713 sg
.length
= ALIGN(final_size
, MLX5_UMR_FLEX_ALIGNMENT
);
714 memset(curr_entry
, 0, sg
.length
- final_size
);
715 mlx5r_umr_final_update_xlt(dev
, &wqe
, mr
, &sg
, flags
);
717 dma_sync_single_for_device(ddev
, sg
.addr
, sg
.length
, DMA_TO_DEVICE
);
718 err
= mlx5r_umr_post_send_wait(dev
, mr
->mmkey
.key
, &wqe
, true);
721 sg
.length
= orig_sg_length
;
722 mlx5r_umr_unmap_free_xlt(dev
, entry
, &sg
);
726 int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr
*mr
, unsigned int flags
)
728 /* No invalidation flow is expected */
729 if (WARN_ON(!mr
->umem
->is_dmabuf
) || (flags
& MLX5_IB_UPD_XLT_ZAP
))
732 return _mlx5r_umr_update_mr_pas(mr
, flags
, true);
736 * Send the DMA list to the HW for a normal MR using UMR.
737 * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
740 int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr
*mr
, unsigned int flags
)
742 if (WARN_ON(mr
->umem
->is_odp
))
745 return _mlx5r_umr_update_mr_pas(mr
, flags
, false);
748 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev
*dev
)
750 return !MLX5_CAP_GEN(dev
->mdev
, umr_indirect_mkey_disabled
);
753 int mlx5r_umr_update_xlt(struct mlx5_ib_mr
*mr
, u64 idx
, int npages
,
754 int page_shift
, int flags
)
756 int desc_size
= (flags
& MLX5_IB_UPD_XLT_INDIRECT
)
757 ? sizeof(struct mlx5_klm
)
758 : sizeof(struct mlx5_mtt
);
759 const int page_align
= MLX5_UMR_FLEX_ALIGNMENT
/ desc_size
;
760 struct mlx5_ib_dev
*dev
= mr_to_mdev(mr
);
761 struct device
*ddev
= &dev
->mdev
->pdev
->dev
;
762 const int page_mask
= page_align
- 1;
763 struct mlx5r_umr_wqe wqe
= {};
764 size_t pages_mapped
= 0;
765 size_t pages_to_map
= 0;
766 size_t size_to_map
= 0;
767 size_t orig_sg_length
;
773 if ((flags
& MLX5_IB_UPD_XLT_INDIRECT
) &&
774 !umr_can_use_indirect_mkey(dev
))
777 if (WARN_ON(!mr
->umem
->is_odp
))
780 /* UMR copies MTTs in units of MLX5_UMR_FLEX_ALIGNMENT bytes,
781 * so we need to align the offset and length accordingly
783 if (idx
& page_mask
) {
784 npages
+= idx
& page_mask
;
787 pages_to_map
= ALIGN(npages
, page_align
);
789 xlt
= mlx5r_umr_create_xlt(dev
, &sg
, npages
, desc_size
, flags
);
793 pages_iter
= sg
.length
/ desc_size
;
794 orig_sg_length
= sg
.length
;
796 if (!(flags
& MLX5_IB_UPD_XLT_INDIRECT
)) {
797 struct ib_umem_odp
*odp
= to_ib_umem_odp(mr
->umem
);
798 size_t max_pages
= ib_umem_odp_num_pages(odp
) - idx
;
800 pages_to_map
= min_t(size_t, pages_to_map
, max_pages
);
803 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe
.ctrl_seg
, flags
, &sg
);
804 mlx5r_umr_set_update_xlt_mkey_seg(dev
, &wqe
.mkey_seg
, mr
, page_shift
);
805 mlx5r_umr_set_update_xlt_data_seg(&wqe
.data_seg
, &sg
);
807 for (pages_mapped
= 0;
808 pages_mapped
< pages_to_map
&& !err
;
809 pages_mapped
+= pages_iter
, idx
+= pages_iter
) {
810 npages
= min_t(int, pages_iter
, pages_to_map
- pages_mapped
);
811 size_to_map
= npages
* desc_size
;
812 dma_sync_single_for_cpu(ddev
, sg
.addr
, sg
.length
,
814 mlx5_odp_populate_xlt(xlt
, idx
, npages
, mr
, flags
);
815 dma_sync_single_for_device(ddev
, sg
.addr
, sg
.length
,
817 sg
.length
= ALIGN(size_to_map
, MLX5_UMR_FLEX_ALIGNMENT
);
819 if (pages_mapped
+ pages_iter
>= pages_to_map
)
820 mlx5r_umr_final_update_xlt(dev
, &wqe
, mr
, &sg
, flags
);
821 mlx5r_umr_update_offset(&wqe
.ctrl_seg
, idx
* desc_size
);
822 err
= mlx5r_umr_post_send_wait(dev
, mr
->mmkey
.key
, &wqe
, true);
824 sg
.length
= orig_sg_length
;
825 mlx5r_umr_unmap_free_xlt(dev
, xlt
, &sg
);