Merge tag 'trace-printf-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace...
[drm/drm-misc.git] / drivers / infiniband / hw / mlx5 / umr.c
blob887fd6fa3ba930f57296f6629b4b517fae061788
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
4 #include <rdma/ib_umem_odp.h>
5 #include "mlx5_ib.h"
6 #include "umr.h"
7 #include "wr.h"
9 /*
10 * We can't use an array for xlt_emergency_page because dma_map_single doesn't
11 * work on kernel modules memory
13 void *xlt_emergency_page;
14 static DEFINE_MUTEX(xlt_emergency_page_mutex);
16 static __be64 get_umr_enable_mr_mask(void)
18 u64 result;
20 result = MLX5_MKEY_MASK_KEY |
21 MLX5_MKEY_MASK_FREE;
23 return cpu_to_be64(result);
26 static __be64 get_umr_disable_mr_mask(void)
28 u64 result;
30 result = MLX5_MKEY_MASK_FREE;
32 return cpu_to_be64(result);
35 static __be64 get_umr_update_translation_mask(void)
37 u64 result;
39 result = MLX5_MKEY_MASK_LEN |
40 MLX5_MKEY_MASK_PAGE_SIZE |
41 MLX5_MKEY_MASK_START_ADDR;
43 return cpu_to_be64(result);
46 static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev)
48 u64 result;
50 result = MLX5_MKEY_MASK_LR |
51 MLX5_MKEY_MASK_LW |
52 MLX5_MKEY_MASK_RR |
53 MLX5_MKEY_MASK_RW;
55 if (MLX5_CAP_GEN(dev->mdev, atomic))
56 result |= MLX5_MKEY_MASK_A;
58 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
59 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
61 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
62 result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
64 return cpu_to_be64(result);
67 static __be64 get_umr_update_pd_mask(void)
69 u64 result;
71 result = MLX5_MKEY_MASK_PD;
73 return cpu_to_be64(result);
76 static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
78 if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
79 MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
80 return -EPERM;
82 if (mask & MLX5_MKEY_MASK_A &&
83 MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
84 return -EPERM;
86 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
87 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
88 return -EPERM;
90 if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
91 !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
92 return -EPERM;
94 return 0;
97 enum {
98 MAX_UMR_WR = 128,
101 static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
103 struct ib_qp_attr attr = {};
104 int ret;
106 attr.qp_state = IB_QPS_INIT;
107 attr.port_num = 1;
108 ret = ib_modify_qp(qp, &attr,
109 IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
110 if (ret) {
111 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
112 return ret;
115 memset(&attr, 0, sizeof(attr));
116 attr.qp_state = IB_QPS_RTR;
118 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
119 if (ret) {
120 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
121 return ret;
124 memset(&attr, 0, sizeof(attr));
125 attr.qp_state = IB_QPS_RTS;
126 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
127 if (ret) {
128 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
129 return ret;
132 return 0;
135 int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
137 struct ib_qp_init_attr init_attr = {};
138 struct ib_cq *cq;
139 struct ib_qp *qp;
140 int ret = 0;
144 * UMR qp is set once, never changed until device unload.
145 * Avoid taking the mutex if initialization is already done.
147 if (dev->umrc.qp)
148 return 0;
150 mutex_lock(&dev->umrc.init_lock);
151 /* First user allocates the UMR resources. Skip if already allocated. */
152 if (dev->umrc.qp)
153 goto unlock;
155 cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
156 if (IS_ERR(cq)) {
157 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
158 ret = PTR_ERR(cq);
159 goto unlock;
162 init_attr.send_cq = cq;
163 init_attr.recv_cq = cq;
164 init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
165 init_attr.cap.max_send_wr = MAX_UMR_WR;
166 init_attr.cap.max_send_sge = 1;
167 init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
168 init_attr.port_num = 1;
169 qp = ib_create_qp(dev->umrc.pd, &init_attr);
170 if (IS_ERR(qp)) {
171 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
172 ret = PTR_ERR(qp);
173 goto destroy_cq;
176 ret = mlx5r_umr_qp_rst2rts(dev, qp);
177 if (ret)
178 goto destroy_qp;
180 dev->umrc.cq = cq;
182 sema_init(&dev->umrc.sem, MAX_UMR_WR);
183 mutex_init(&dev->umrc.lock);
184 dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
185 dev->umrc.qp = qp;
187 mutex_unlock(&dev->umrc.init_lock);
188 return 0;
190 destroy_qp:
191 ib_destroy_qp(qp);
192 destroy_cq:
193 ib_free_cq(cq);
194 unlock:
195 mutex_unlock(&dev->umrc.init_lock);
196 return ret;
199 void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
201 if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
202 return;
203 mutex_destroy(&dev->umrc.lock);
204 /* After device init, UMR cp/qp are not unset during the lifetime. */
205 ib_destroy_qp(dev->umrc.qp);
206 ib_free_cq(dev->umrc.cq);
209 int mlx5r_umr_init(struct mlx5_ib_dev *dev)
211 struct ib_pd *pd;
213 pd = ib_alloc_pd(&dev->ib_dev, 0);
214 if (IS_ERR(pd)) {
215 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
216 return PTR_ERR(pd);
218 dev->umrc.pd = pd;
220 mutex_init(&dev->umrc.init_lock);
222 return 0;
225 void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev)
227 if (!dev->umrc.pd)
228 return;
230 mutex_destroy(&dev->umrc.init_lock);
231 ib_dealloc_pd(dev->umrc.pd);
234 static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
236 struct umr_common *umrc = &dev->umrc;
237 struct ib_qp_attr attr;
238 int err;
240 attr.qp_state = IB_QPS_RESET;
241 err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
242 if (err) {
243 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
244 goto err;
247 err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
248 if (err)
249 goto err;
251 umrc->state = MLX5_UMR_STATE_ACTIVE;
252 return 0;
254 err:
255 umrc->state = MLX5_UMR_STATE_ERR;
256 return err;
259 static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
260 struct mlx5r_umr_wqe *wqe, bool with_data)
262 unsigned int wqe_size =
263 with_data ? sizeof(struct mlx5r_umr_wqe) :
264 sizeof(struct mlx5r_umr_wqe) -
265 sizeof(struct mlx5_wqe_data_seg);
266 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
267 struct mlx5_core_dev *mdev = dev->mdev;
268 struct mlx5_ib_qp *qp = to_mqp(ibqp);
269 struct mlx5_wqe_ctrl_seg *ctrl;
270 union {
271 struct ib_cqe *ib_cqe;
272 u64 wr_id;
273 } id;
274 void *cur_edge, *seg;
275 unsigned long flags;
276 unsigned int idx;
277 int size, err;
279 if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
280 return -EIO;
282 spin_lock_irqsave(&qp->sq.lock, flags);
284 err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0,
285 cpu_to_be32(mkey), false, false);
286 if (WARN_ON(err))
287 goto out;
289 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
291 mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size);
293 id.ib_cqe = cqe;
294 mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
295 MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
297 mlx5r_ring_db(qp, 1, ctrl);
299 out:
300 spin_unlock_irqrestore(&qp->sq.lock, flags);
302 return err;
305 static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
307 struct mlx5_ib_umr_context *context =
308 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
310 context->status = wc->status;
311 complete(&context->done);
314 static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context)
316 context->cqe.done = mlx5r_umr_done;
317 init_completion(&context->done);
320 static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
321 struct mlx5r_umr_wqe *wqe, bool with_data)
323 struct umr_common *umrc = &dev->umrc;
324 struct mlx5r_umr_context umr_context;
325 int err;
327 err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask));
328 if (WARN_ON(err))
329 return err;
331 mlx5r_umr_init_context(&umr_context);
333 down(&umrc->sem);
334 while (true) {
335 mutex_lock(&umrc->lock);
336 if (umrc->state == MLX5_UMR_STATE_ERR) {
337 mutex_unlock(&umrc->lock);
338 err = -EFAULT;
339 break;
342 if (umrc->state == MLX5_UMR_STATE_RECOVER) {
343 mutex_unlock(&umrc->lock);
344 usleep_range(3000, 5000);
345 continue;
348 err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
349 with_data);
350 mutex_unlock(&umrc->lock);
351 if (err) {
352 mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
353 err);
354 break;
357 wait_for_completion(&umr_context.done);
359 if (umr_context.status == IB_WC_SUCCESS)
360 break;
362 if (umr_context.status == IB_WC_WR_FLUSH_ERR)
363 continue;
365 WARN_ON_ONCE(1);
366 mlx5_ib_warn(dev,
367 "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
368 umr_context.status, mkey);
369 mutex_lock(&umrc->lock);
370 err = mlx5r_umr_recover(dev);
371 mutex_unlock(&umrc->lock);
372 if (err)
373 mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
374 err);
375 err = -EFAULT;
376 break;
378 up(&umrc->sem);
379 return err;
383 * mlx5r_umr_revoke_mr - Fence all DMA on the MR
384 * @mr: The MR to fence
386 * Upon return the NIC will not be doing any DMA to the pages under the MR,
387 * and any DMA in progress will be completed. Failure of this function
388 * indicates the HW has failed catastrophically.
390 int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
392 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
393 struct mlx5r_umr_wqe wqe = {};
395 if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
396 return 0;
398 wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
399 wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask();
400 wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
402 MLX5_SET(mkc, &wqe.mkey_seg, free, 1);
403 MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn);
404 MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
405 MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
406 mlx5_mkey_variant(mr->mmkey.key));
408 return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
411 static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev,
412 struct mlx5_mkey_seg *seg,
413 unsigned int access_flags)
415 bool ro_read = (access_flags & IB_ACCESS_RELAXED_ORDERING) &&
416 (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) ||
417 pcie_relaxed_ordering_enabled(dev->mdev->pdev));
419 MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
420 MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
421 MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
422 MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
423 MLX5_SET(mkc, seg, lr, 1);
424 MLX5_SET(mkc, seg, relaxed_ordering_write,
425 !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
426 MLX5_SET(mkc, seg, relaxed_ordering_read, ro_read);
429 int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
430 int access_flags)
432 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
433 struct mlx5r_umr_wqe wqe = {};
434 int err;
436 wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev);
437 wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
438 wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE;
439 wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
441 mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags);
442 MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn);
443 MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
444 MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
445 mlx5_mkey_variant(mr->mmkey.key));
447 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
448 if (err)
449 return err;
451 mr->access_flags = access_flags;
452 return 0;
455 #define MLX5_MAX_UMR_CHUNK \
456 ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_FLEX_ALIGNMENT)
457 #define MLX5_SPARE_UMR_CHUNK 0x10000
460 * Allocate a temporary buffer to hold the per-page information to transfer to
461 * HW. For efficiency this should be as large as it can be, but buffer
462 * allocation failure is not allowed, so try smaller sizes.
464 static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
466 const size_t xlt_chunk_align = MLX5_UMR_FLEX_ALIGNMENT / ent_size;
467 size_t size;
468 void *res = NULL;
470 static_assert(PAGE_SIZE % MLX5_UMR_FLEX_ALIGNMENT == 0);
473 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
474 * allocation can't trigger any kind of reclaim.
476 might_sleep();
478 gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
481 * If the system already has a suitable high order page then just use
482 * that, but don't try hard to create one. This max is about 1M, so a
483 * free x86 huge page will satisfy it.
485 size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
486 MLX5_MAX_UMR_CHUNK);
487 *nents = size / ent_size;
488 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
489 get_order(size));
490 if (res)
491 return res;
493 if (size > MLX5_SPARE_UMR_CHUNK) {
494 size = MLX5_SPARE_UMR_CHUNK;
495 *nents = size / ent_size;
496 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
497 get_order(size));
498 if (res)
499 return res;
502 *nents = PAGE_SIZE / ent_size;
503 res = (void *)__get_free_page(gfp_mask);
504 if (res)
505 return res;
507 mutex_lock(&xlt_emergency_page_mutex);
508 memset(xlt_emergency_page, 0, PAGE_SIZE);
509 return xlt_emergency_page;
512 static void mlx5r_umr_free_xlt(void *xlt, size_t length)
514 if (xlt == xlt_emergency_page) {
515 mutex_unlock(&xlt_emergency_page_mutex);
516 return;
519 free_pages((unsigned long)xlt, get_order(length));
522 static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
523 struct ib_sge *sg)
525 struct device *ddev = &dev->mdev->pdev->dev;
527 dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
528 mlx5r_umr_free_xlt(xlt, sg->length);
532 * Create an XLT buffer ready for submission.
534 static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
535 size_t nents, size_t ent_size,
536 unsigned int flags)
538 struct device *ddev = &dev->mdev->pdev->dev;
539 dma_addr_t dma;
540 void *xlt;
542 xlt = mlx5r_umr_alloc_xlt(&nents, ent_size,
543 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
544 GFP_KERNEL);
545 sg->length = nents * ent_size;
546 dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
547 if (dma_mapping_error(ddev, dma)) {
548 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
549 mlx5r_umr_free_xlt(xlt, sg->length);
550 return NULL;
552 sg->addr = dma;
553 sg->lkey = dev->umrc.pd->local_dma_lkey;
555 return xlt;
558 static void
559 mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
560 unsigned int flags, struct ib_sge *sg)
562 if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
563 /* fail if free */
564 ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
565 else
566 /* fail if not free */
567 ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
568 ctrl_seg->xlt_octowords =
569 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
572 static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
573 struct mlx5_mkey_seg *mkey_seg,
574 struct mlx5_ib_mr *mr,
575 unsigned int page_shift)
577 mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
578 MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
579 MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
580 MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
581 MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
582 MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
583 MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
586 static void
587 mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
588 struct ib_sge *sg)
590 data_seg->byte_count = cpu_to_be32(sg->length);
591 data_seg->lkey = cpu_to_be32(sg->lkey);
592 data_seg->addr = cpu_to_be64(sg->addr);
595 static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
596 u64 offset)
598 u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
600 ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
601 ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
602 ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
605 static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
606 struct mlx5r_umr_wqe *wqe,
607 struct mlx5_ib_mr *mr, struct ib_sge *sg,
608 unsigned int flags)
610 bool update_pd_access, update_translation;
612 if (flags & MLX5_IB_UPD_XLT_ENABLE)
613 wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
615 update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
616 flags & MLX5_IB_UPD_XLT_PD ||
617 flags & MLX5_IB_UPD_XLT_ACCESS;
619 if (update_pd_access) {
620 wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
621 wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
624 update_translation =
625 flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
627 if (update_translation) {
628 wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
629 if (!mr->ibmr.length)
630 MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
633 wqe->ctrl_seg.xlt_octowords =
634 cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
635 wqe->data_seg.byte_count = cpu_to_be32(sg->length);
638 static int
639 _mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd)
641 size_t ent_size = dd ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt);
642 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
643 struct device *ddev = &dev->mdev->pdev->dev;
644 struct mlx5r_umr_wqe wqe = {};
645 struct ib_block_iter biter;
646 struct mlx5_ksm *cur_ksm;
647 struct mlx5_mtt *cur_mtt;
648 size_t orig_sg_length;
649 size_t final_size;
650 void *curr_entry;
651 struct ib_sge sg;
652 void *entry;
653 u64 offset = 0;
654 int err = 0;
656 entry = mlx5r_umr_create_xlt(dev, &sg,
657 ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
658 ent_size, flags);
659 if (!entry)
660 return -ENOMEM;
662 orig_sg_length = sg.length;
663 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
664 mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
665 mr->page_shift);
666 if (dd) {
667 /* Use the data direct internal kernel PD */
668 MLX5_SET(mkc, &wqe.mkey_seg, pd, dev->ddr.pdn);
669 cur_ksm = entry;
670 } else {
671 cur_mtt = entry;
674 mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
676 curr_entry = entry;
677 rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) {
678 if (curr_entry == entry + sg.length) {
679 dma_sync_single_for_device(ddev, sg.addr, sg.length,
680 DMA_TO_DEVICE);
682 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
683 true);
684 if (err)
685 goto err;
686 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
687 DMA_TO_DEVICE);
688 offset += sg.length;
689 mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
690 if (dd)
691 cur_ksm = entry;
692 else
693 cur_mtt = entry;
696 if (dd) {
697 cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter));
698 cur_ksm->key = cpu_to_be32(dev->ddr.mkey);
699 cur_ksm++;
700 curr_entry = cur_ksm;
701 } else {
702 cur_mtt->ptag =
703 cpu_to_be64(rdma_block_iter_dma_address(&biter) |
704 MLX5_IB_MTT_PRESENT);
705 if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
706 cur_mtt->ptag = 0;
707 cur_mtt++;
708 curr_entry = cur_mtt;
712 final_size = curr_entry - entry;
713 sg.length = ALIGN(final_size, MLX5_UMR_FLEX_ALIGNMENT);
714 memset(curr_entry, 0, sg.length - final_size);
715 mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
717 dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
718 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
720 err:
721 sg.length = orig_sg_length;
722 mlx5r_umr_unmap_free_xlt(dev, entry, &sg);
723 return err;
726 int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags)
728 /* No invalidation flow is expected */
729 if (WARN_ON(!mr->umem->is_dmabuf) || (flags & MLX5_IB_UPD_XLT_ZAP))
730 return -EINVAL;
732 return _mlx5r_umr_update_mr_pas(mr, flags, true);
736 * Send the DMA list to the HW for a normal MR using UMR.
737 * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
738 * flag may be used.
740 int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
742 if (WARN_ON(mr->umem->is_odp))
743 return -EINVAL;
745 return _mlx5r_umr_update_mr_pas(mr, flags, false);
748 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
750 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
753 int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
754 int page_shift, int flags)
756 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
757 ? sizeof(struct mlx5_klm)
758 : sizeof(struct mlx5_mtt);
759 const int page_align = MLX5_UMR_FLEX_ALIGNMENT / desc_size;
760 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
761 struct device *ddev = &dev->mdev->pdev->dev;
762 const int page_mask = page_align - 1;
763 struct mlx5r_umr_wqe wqe = {};
764 size_t pages_mapped = 0;
765 size_t pages_to_map = 0;
766 size_t size_to_map = 0;
767 size_t orig_sg_length;
768 size_t pages_iter;
769 struct ib_sge sg;
770 int err = 0;
771 void *xlt;
773 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
774 !umr_can_use_indirect_mkey(dev))
775 return -EPERM;
777 if (WARN_ON(!mr->umem->is_odp))
778 return -EINVAL;
780 /* UMR copies MTTs in units of MLX5_UMR_FLEX_ALIGNMENT bytes,
781 * so we need to align the offset and length accordingly
783 if (idx & page_mask) {
784 npages += idx & page_mask;
785 idx &= ~page_mask;
787 pages_to_map = ALIGN(npages, page_align);
789 xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags);
790 if (!xlt)
791 return -ENOMEM;
793 pages_iter = sg.length / desc_size;
794 orig_sg_length = sg.length;
796 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
797 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
798 size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
800 pages_to_map = min_t(size_t, pages_to_map, max_pages);
803 mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
804 mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift);
805 mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
807 for (pages_mapped = 0;
808 pages_mapped < pages_to_map && !err;
809 pages_mapped += pages_iter, idx += pages_iter) {
810 npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
811 size_to_map = npages * desc_size;
812 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
813 DMA_TO_DEVICE);
814 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
815 dma_sync_single_for_device(ddev, sg.addr, sg.length,
816 DMA_TO_DEVICE);
817 sg.length = ALIGN(size_to_map, MLX5_UMR_FLEX_ALIGNMENT);
819 if (pages_mapped + pages_iter >= pages_to_map)
820 mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
821 mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size);
822 err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
824 sg.length = orig_sg_length;
825 mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
826 return err;