WIP FPC-III support
[linux/fpc-iii.git] / drivers / infiniband / hw / mlx5 / mr.c
blob24f8d59a42eae6657ef14296c4c2291f79a3a37b
1 /*
2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
34 #include <linux/kref.h>
35 #include <linux/random.h>
36 #include <linux/debugfs.h>
37 #include <linux/export.h>
38 #include <linux/delay.h>
39 #include <rdma/ib_umem.h>
40 #include <rdma/ib_umem_odp.h>
41 #include <rdma/ib_verbs.h>
42 #include "mlx5_ib.h"
45 * We can't use an array for xlt_emergency_page because dma_map_single doesn't
46 * work on kernel modules memory
48 void *xlt_emergency_page;
49 static DEFINE_MUTEX(xlt_emergency_page_mutex);
51 enum {
52 MAX_PENDING_REG_MR = 8,
55 #define MLX5_UMR_ALIGN 2048
57 static void
58 create_mkey_callback(int status, struct mlx5_async_work *context);
59 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
60 u64 iova, int access_flags,
61 unsigned int page_size, bool populate);
63 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
64 struct ib_pd *pd)
66 struct mlx5_ib_dev *dev = to_mdev(pd->device);
68 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
69 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
70 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
71 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
72 MLX5_SET(mkc, mkc, lr, 1);
74 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
75 MLX5_SET(mkc, mkc, relaxed_ordering_write,
76 !!(acc & IB_ACCESS_RELAXED_ORDERING));
77 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read))
78 MLX5_SET(mkc, mkc, relaxed_ordering_read,
79 !!(acc & IB_ACCESS_RELAXED_ORDERING));
81 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
82 MLX5_SET(mkc, mkc, qpn, 0xffffff);
83 MLX5_SET64(mkc, mkc, start_addr, start_addr);
86 static void
87 assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
88 u32 *in)
90 u8 key = atomic_inc_return(&dev->mkey_var);
91 void *mkc;
93 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
94 MLX5_SET(mkc, mkc, mkey_7_0, key);
95 mkey->key = key;
98 static int
99 mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
100 u32 *in, int inlen)
102 assign_mkey_variant(dev, mkey, in);
103 return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen);
106 static int
107 mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
108 struct mlx5_core_mkey *mkey,
109 struct mlx5_async_ctx *async_ctx,
110 u32 *in, int inlen, u32 *out, int outlen,
111 struct mlx5_async_work *context)
113 MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
114 assign_mkey_variant(dev, mkey, in);
115 return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
116 create_mkey_callback, context);
119 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
120 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
121 static int mr_cache_max_order(struct mlx5_ib_dev *dev);
122 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
124 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
126 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
129 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
131 WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
133 return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
136 static void create_mkey_callback(int status, struct mlx5_async_work *context)
138 struct mlx5_ib_mr *mr =
139 container_of(context, struct mlx5_ib_mr, cb_work);
140 struct mlx5_cache_ent *ent = mr->cache_ent;
141 struct mlx5_ib_dev *dev = ent->dev;
142 unsigned long flags;
144 if (status) {
145 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
146 kfree(mr);
147 spin_lock_irqsave(&ent->lock, flags);
148 ent->pending--;
149 WRITE_ONCE(dev->fill_delay, 1);
150 spin_unlock_irqrestore(&ent->lock, flags);
151 mod_timer(&dev->delay_timer, jiffies + HZ);
152 return;
155 mr->mmkey.type = MLX5_MKEY_MR;
156 mr->mmkey.key |= mlx5_idx_to_mkey(
157 MLX5_GET(create_mkey_out, mr->out, mkey_index));
159 WRITE_ONCE(dev->cache.last_add, jiffies);
161 spin_lock_irqsave(&ent->lock, flags);
162 list_add_tail(&mr->list, &ent->head);
163 ent->available_mrs++;
164 ent->total_mrs++;
165 /* If we are doing fill_to_high_water then keep going. */
166 queue_adjust_cache_locked(ent);
167 ent->pending--;
168 spin_unlock_irqrestore(&ent->lock, flags);
171 static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc)
173 struct mlx5_ib_mr *mr;
175 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
176 if (!mr)
177 return NULL;
178 mr->cache_ent = ent;
180 set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
181 MLX5_SET(mkc, mkc, free, 1);
182 MLX5_SET(mkc, mkc, umr_en, 1);
183 MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
184 MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7);
186 MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
187 MLX5_SET(mkc, mkc, log_page_size, ent->page);
188 return mr;
191 /* Asynchronously schedule new MRs to be populated in the cache. */
192 static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
194 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
195 struct mlx5_ib_mr *mr;
196 void *mkc;
197 u32 *in;
198 int err = 0;
199 int i;
201 in = kzalloc(inlen, GFP_KERNEL);
202 if (!in)
203 return -ENOMEM;
205 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
206 for (i = 0; i < num; i++) {
207 mr = alloc_cache_mr(ent, mkc);
208 if (!mr) {
209 err = -ENOMEM;
210 break;
212 spin_lock_irq(&ent->lock);
213 if (ent->pending >= MAX_PENDING_REG_MR) {
214 err = -EAGAIN;
215 spin_unlock_irq(&ent->lock);
216 kfree(mr);
217 break;
219 ent->pending++;
220 spin_unlock_irq(&ent->lock);
221 err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
222 &ent->dev->async_ctx, in, inlen,
223 mr->out, sizeof(mr->out),
224 &mr->cb_work);
225 if (err) {
226 spin_lock_irq(&ent->lock);
227 ent->pending--;
228 spin_unlock_irq(&ent->lock);
229 mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
230 kfree(mr);
231 break;
235 kfree(in);
236 return err;
239 /* Synchronously create a MR in the cache */
240 static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
242 size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
243 struct mlx5_ib_mr *mr;
244 void *mkc;
245 u32 *in;
246 int err;
248 in = kzalloc(inlen, GFP_KERNEL);
249 if (!in)
250 return ERR_PTR(-ENOMEM);
251 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
253 mr = alloc_cache_mr(ent, mkc);
254 if (!mr) {
255 err = -ENOMEM;
256 goto free_in;
259 err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
260 if (err)
261 goto free_mr;
263 mr->mmkey.type = MLX5_MKEY_MR;
264 WRITE_ONCE(ent->dev->cache.last_add, jiffies);
265 spin_lock_irq(&ent->lock);
266 ent->total_mrs++;
267 spin_unlock_irq(&ent->lock);
268 kfree(in);
269 return mr;
270 free_mr:
271 kfree(mr);
272 free_in:
273 kfree(in);
274 return ERR_PTR(err);
277 static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
279 struct mlx5_ib_mr *mr;
281 lockdep_assert_held(&ent->lock);
282 if (list_empty(&ent->head))
283 return;
284 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
285 list_del(&mr->list);
286 ent->available_mrs--;
287 ent->total_mrs--;
288 spin_unlock_irq(&ent->lock);
289 mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
290 kfree(mr);
291 spin_lock_irq(&ent->lock);
294 static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
295 bool limit_fill)
297 int err;
299 lockdep_assert_held(&ent->lock);
301 while (true) {
302 if (limit_fill)
303 target = ent->limit * 2;
304 if (target == ent->available_mrs + ent->pending)
305 return 0;
306 if (target > ent->available_mrs + ent->pending) {
307 u32 todo = target - (ent->available_mrs + ent->pending);
309 spin_unlock_irq(&ent->lock);
310 err = add_keys(ent, todo);
311 if (err == -EAGAIN)
312 usleep_range(3000, 5000);
313 spin_lock_irq(&ent->lock);
314 if (err) {
315 if (err != -EAGAIN)
316 return err;
317 } else
318 return 0;
319 } else {
320 remove_cache_mr_locked(ent);
325 static ssize_t size_write(struct file *filp, const char __user *buf,
326 size_t count, loff_t *pos)
328 struct mlx5_cache_ent *ent = filp->private_data;
329 u32 target;
330 int err;
332 err = kstrtou32_from_user(buf, count, 0, &target);
333 if (err)
334 return err;
337 * Target is the new value of total_mrs the user requests, however we
338 * cannot free MRs that are in use. Compute the target value for
339 * available_mrs.
341 spin_lock_irq(&ent->lock);
342 if (target < ent->total_mrs - ent->available_mrs) {
343 err = -EINVAL;
344 goto err_unlock;
346 target = target - (ent->total_mrs - ent->available_mrs);
347 if (target < ent->limit || target > ent->limit*2) {
348 err = -EINVAL;
349 goto err_unlock;
351 err = resize_available_mrs(ent, target, false);
352 if (err)
353 goto err_unlock;
354 spin_unlock_irq(&ent->lock);
356 return count;
358 err_unlock:
359 spin_unlock_irq(&ent->lock);
360 return err;
363 static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
364 loff_t *pos)
366 struct mlx5_cache_ent *ent = filp->private_data;
367 char lbuf[20];
368 int err;
370 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs);
371 if (err < 0)
372 return err;
374 return simple_read_from_buffer(buf, count, pos, lbuf, err);
377 static const struct file_operations size_fops = {
378 .owner = THIS_MODULE,
379 .open = simple_open,
380 .write = size_write,
381 .read = size_read,
384 static ssize_t limit_write(struct file *filp, const char __user *buf,
385 size_t count, loff_t *pos)
387 struct mlx5_cache_ent *ent = filp->private_data;
388 u32 var;
389 int err;
391 err = kstrtou32_from_user(buf, count, 0, &var);
392 if (err)
393 return err;
396 * Upon set we immediately fill the cache to high water mark implied by
397 * the limit.
399 spin_lock_irq(&ent->lock);
400 ent->limit = var;
401 err = resize_available_mrs(ent, 0, true);
402 spin_unlock_irq(&ent->lock);
403 if (err)
404 return err;
405 return count;
408 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
409 loff_t *pos)
411 struct mlx5_cache_ent *ent = filp->private_data;
412 char lbuf[20];
413 int err;
415 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
416 if (err < 0)
417 return err;
419 return simple_read_from_buffer(buf, count, pos, lbuf, err);
422 static const struct file_operations limit_fops = {
423 .owner = THIS_MODULE,
424 .open = simple_open,
425 .write = limit_write,
426 .read = limit_read,
429 static bool someone_adding(struct mlx5_mr_cache *cache)
431 unsigned int i;
433 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
434 struct mlx5_cache_ent *ent = &cache->ent[i];
435 bool ret;
437 spin_lock_irq(&ent->lock);
438 ret = ent->available_mrs < ent->limit;
439 spin_unlock_irq(&ent->lock);
440 if (ret)
441 return true;
443 return false;
447 * Check if the bucket is outside the high/low water mark and schedule an async
448 * update. The cache refill has hysteresis, once the low water mark is hit it is
449 * refilled up to the high mark.
451 static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
453 lockdep_assert_held(&ent->lock);
455 if (ent->disabled || READ_ONCE(ent->dev->fill_delay))
456 return;
457 if (ent->available_mrs < ent->limit) {
458 ent->fill_to_high_water = true;
459 queue_work(ent->dev->cache.wq, &ent->work);
460 } else if (ent->fill_to_high_water &&
461 ent->available_mrs + ent->pending < 2 * ent->limit) {
463 * Once we start populating due to hitting a low water mark
464 * continue until we pass the high water mark.
466 queue_work(ent->dev->cache.wq, &ent->work);
467 } else if (ent->available_mrs == 2 * ent->limit) {
468 ent->fill_to_high_water = false;
469 } else if (ent->available_mrs > 2 * ent->limit) {
470 /* Queue deletion of excess entries */
471 ent->fill_to_high_water = false;
472 if (ent->pending)
473 queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
474 msecs_to_jiffies(1000));
475 else
476 queue_work(ent->dev->cache.wq, &ent->work);
480 static void __cache_work_func(struct mlx5_cache_ent *ent)
482 struct mlx5_ib_dev *dev = ent->dev;
483 struct mlx5_mr_cache *cache = &dev->cache;
484 int err;
486 spin_lock_irq(&ent->lock);
487 if (ent->disabled)
488 goto out;
490 if (ent->fill_to_high_water &&
491 ent->available_mrs + ent->pending < 2 * ent->limit &&
492 !READ_ONCE(dev->fill_delay)) {
493 spin_unlock_irq(&ent->lock);
494 err = add_keys(ent, 1);
495 spin_lock_irq(&ent->lock);
496 if (ent->disabled)
497 goto out;
498 if (err) {
500 * EAGAIN only happens if pending is positive, so we
501 * will be rescheduled from reg_mr_callback(). The only
502 * failure path here is ENOMEM.
504 if (err != -EAGAIN) {
505 mlx5_ib_warn(
506 dev,
507 "command failed order %d, err %d\n",
508 ent->order, err);
509 queue_delayed_work(cache->wq, &ent->dwork,
510 msecs_to_jiffies(1000));
513 } else if (ent->available_mrs > 2 * ent->limit) {
514 bool need_delay;
517 * The remove_cache_mr() logic is performed as garbage
518 * collection task. Such task is intended to be run when no
519 * other active processes are running.
521 * The need_resched() will return TRUE if there are user tasks
522 * to be activated in near future.
524 * In such case, we don't execute remove_cache_mr() and postpone
525 * the garbage collection work to try to run in next cycle, in
526 * order to free CPU resources to other tasks.
528 spin_unlock_irq(&ent->lock);
529 need_delay = need_resched() || someone_adding(cache) ||
530 time_after(jiffies,
531 READ_ONCE(cache->last_add) + 300 * HZ);
532 spin_lock_irq(&ent->lock);
533 if (ent->disabled)
534 goto out;
535 if (need_delay)
536 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
537 remove_cache_mr_locked(ent);
538 queue_adjust_cache_locked(ent);
540 out:
541 spin_unlock_irq(&ent->lock);
544 static void delayed_cache_work_func(struct work_struct *work)
546 struct mlx5_cache_ent *ent;
548 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
549 __cache_work_func(ent);
552 static void cache_work_func(struct work_struct *work)
554 struct mlx5_cache_ent *ent;
556 ent = container_of(work, struct mlx5_cache_ent, work);
557 __cache_work_func(ent);
560 /* Allocate a special entry from the cache */
561 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
562 unsigned int entry, int access_flags)
564 struct mlx5_mr_cache *cache = &dev->cache;
565 struct mlx5_cache_ent *ent;
566 struct mlx5_ib_mr *mr;
568 if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
569 entry >= ARRAY_SIZE(cache->ent)))
570 return ERR_PTR(-EINVAL);
572 /* Matches access in alloc_cache_mr() */
573 if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
574 return ERR_PTR(-EOPNOTSUPP);
576 ent = &cache->ent[entry];
577 spin_lock_irq(&ent->lock);
578 if (list_empty(&ent->head)) {
579 spin_unlock_irq(&ent->lock);
580 mr = create_cache_mr(ent);
581 if (IS_ERR(mr))
582 return mr;
583 } else {
584 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
585 list_del(&mr->list);
586 ent->available_mrs--;
587 queue_adjust_cache_locked(ent);
588 spin_unlock_irq(&ent->lock);
590 mr->access_flags = access_flags;
591 return mr;
594 /* Return a MR already available in the cache */
595 static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent)
597 struct mlx5_ib_dev *dev = req_ent->dev;
598 struct mlx5_ib_mr *mr = NULL;
599 struct mlx5_cache_ent *ent = req_ent;
601 /* Try larger MR pools from the cache to satisfy the allocation */
602 for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) {
603 mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order,
604 ent - dev->cache.ent);
606 spin_lock_irq(&ent->lock);
607 if (!list_empty(&ent->head)) {
608 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
609 list);
610 list_del(&mr->list);
611 ent->available_mrs--;
612 queue_adjust_cache_locked(ent);
613 spin_unlock_irq(&ent->lock);
614 break;
616 queue_adjust_cache_locked(ent);
617 spin_unlock_irq(&ent->lock);
620 if (!mr)
621 req_ent->miss++;
623 return mr;
626 static void detach_mr_from_cache(struct mlx5_ib_mr *mr)
628 struct mlx5_cache_ent *ent = mr->cache_ent;
630 mr->cache_ent = NULL;
631 spin_lock_irq(&ent->lock);
632 ent->total_mrs--;
633 spin_unlock_irq(&ent->lock);
636 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
638 struct mlx5_cache_ent *ent = mr->cache_ent;
640 if (!ent)
641 return;
643 if (mlx5_mr_cache_invalidate(mr)) {
644 detach_mr_from_cache(mr);
645 destroy_mkey(dev, mr);
646 kfree(mr);
647 return;
650 spin_lock_irq(&ent->lock);
651 list_add_tail(&mr->list, &ent->head);
652 ent->available_mrs++;
653 queue_adjust_cache_locked(ent);
654 spin_unlock_irq(&ent->lock);
657 static void clean_keys(struct mlx5_ib_dev *dev, int c)
659 struct mlx5_mr_cache *cache = &dev->cache;
660 struct mlx5_cache_ent *ent = &cache->ent[c];
661 struct mlx5_ib_mr *tmp_mr;
662 struct mlx5_ib_mr *mr;
663 LIST_HEAD(del_list);
665 cancel_delayed_work(&ent->dwork);
666 while (1) {
667 spin_lock_irq(&ent->lock);
668 if (list_empty(&ent->head)) {
669 spin_unlock_irq(&ent->lock);
670 break;
672 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
673 list_move(&mr->list, &del_list);
674 ent->available_mrs--;
675 ent->total_mrs--;
676 spin_unlock_irq(&ent->lock);
677 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
680 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
681 list_del(&mr->list);
682 kfree(mr);
686 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
688 if (!mlx5_debugfs_root || dev->is_rep)
689 return;
691 debugfs_remove_recursive(dev->cache.root);
692 dev->cache.root = NULL;
695 static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
697 struct mlx5_mr_cache *cache = &dev->cache;
698 struct mlx5_cache_ent *ent;
699 struct dentry *dir;
700 int i;
702 if (!mlx5_debugfs_root || dev->is_rep)
703 return;
705 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
707 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
708 ent = &cache->ent[i];
709 sprintf(ent->name, "%d", ent->order);
710 dir = debugfs_create_dir(ent->name, cache->root);
711 debugfs_create_file("size", 0600, dir, ent, &size_fops);
712 debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
713 debugfs_create_u32("cur", 0400, dir, &ent->available_mrs);
714 debugfs_create_u32("miss", 0600, dir, &ent->miss);
718 static void delay_time_func(struct timer_list *t)
720 struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
722 WRITE_ONCE(dev->fill_delay, 0);
725 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
727 struct mlx5_mr_cache *cache = &dev->cache;
728 struct mlx5_cache_ent *ent;
729 int i;
731 mutex_init(&dev->slow_path_mutex);
732 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
733 if (!cache->wq) {
734 mlx5_ib_warn(dev, "failed to create work queue\n");
735 return -ENOMEM;
738 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
739 timer_setup(&dev->delay_timer, delay_time_func, 0);
740 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
741 ent = &cache->ent[i];
742 INIT_LIST_HEAD(&ent->head);
743 spin_lock_init(&ent->lock);
744 ent->order = i + 2;
745 ent->dev = dev;
746 ent->limit = 0;
748 INIT_WORK(&ent->work, cache_work_func);
749 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
751 if (i > MR_CACHE_LAST_STD_ENTRY) {
752 mlx5_odp_init_mr_cache_entry(ent);
753 continue;
756 if (ent->order > mr_cache_max_order(dev))
757 continue;
759 ent->page = PAGE_SHIFT;
760 ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
761 MLX5_IB_UMR_OCTOWORD;
762 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
763 if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
764 !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
765 mlx5_ib_can_load_pas_with_umr(dev, 0))
766 ent->limit = dev->mdev->profile->mr_cache[i].limit;
767 else
768 ent->limit = 0;
769 spin_lock_irq(&ent->lock);
770 queue_adjust_cache_locked(ent);
771 spin_unlock_irq(&ent->lock);
774 mlx5_mr_cache_debugfs_init(dev);
776 return 0;
779 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
781 unsigned int i;
783 if (!dev->cache.wq)
784 return 0;
786 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
787 struct mlx5_cache_ent *ent = &dev->cache.ent[i];
789 spin_lock_irq(&ent->lock);
790 ent->disabled = true;
791 spin_unlock_irq(&ent->lock);
792 cancel_work_sync(&ent->work);
793 cancel_delayed_work_sync(&ent->dwork);
796 mlx5_mr_cache_debugfs_cleanup(dev);
797 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
799 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
800 clean_keys(dev, i);
802 destroy_workqueue(dev->cache.wq);
803 del_timer_sync(&dev->delay_timer);
805 return 0;
808 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
810 struct mlx5_ib_dev *dev = to_mdev(pd->device);
811 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
812 struct mlx5_ib_mr *mr;
813 void *mkc;
814 u32 *in;
815 int err;
817 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
818 if (!mr)
819 return ERR_PTR(-ENOMEM);
821 in = kzalloc(inlen, GFP_KERNEL);
822 if (!in) {
823 err = -ENOMEM;
824 goto err_free;
827 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
829 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
830 MLX5_SET(mkc, mkc, length64, 1);
831 set_mkc_access_pd_addr_fields(mkc, acc, 0, pd);
833 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
834 if (err)
835 goto err_in;
837 kfree(in);
838 mr->mmkey.type = MLX5_MKEY_MR;
839 mr->ibmr.lkey = mr->mmkey.key;
840 mr->ibmr.rkey = mr->mmkey.key;
841 mr->umem = NULL;
843 return &mr->ibmr;
845 err_in:
846 kfree(in);
848 err_free:
849 kfree(mr);
851 return ERR_PTR(err);
854 static int get_octo_len(u64 addr, u64 len, int page_shift)
856 u64 page_size = 1ULL << page_shift;
857 u64 offset;
858 int npages;
860 offset = addr & (page_size - 1);
861 npages = ALIGN(len + offset, page_size) >> page_shift;
862 return (npages + 1) / 2;
865 static int mr_cache_max_order(struct mlx5_ib_dev *dev)
867 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
868 return MR_CACHE_LAST_STD_ENTRY + 2;
869 return MLX5_MAX_UMR_SHIFT;
872 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
874 struct mlx5_ib_umr_context *context =
875 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
877 context->status = wc->status;
878 complete(&context->done);
881 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
883 context->cqe.done = mlx5_ib_umr_done;
884 context->status = -1;
885 init_completion(&context->done);
888 static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
889 struct mlx5_umr_wr *umrwr)
891 struct umr_common *umrc = &dev->umrc;
892 const struct ib_send_wr *bad;
893 int err;
894 struct mlx5_ib_umr_context umr_context;
896 mlx5_ib_init_umr_context(&umr_context);
897 umrwr->wr.wr_cqe = &umr_context.cqe;
899 down(&umrc->sem);
900 err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
901 if (err) {
902 mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
903 } else {
904 wait_for_completion(&umr_context.done);
905 if (umr_context.status != IB_WC_SUCCESS) {
906 mlx5_ib_warn(dev, "reg umr failed (%u)\n",
907 umr_context.status);
908 err = -EFAULT;
911 up(&umrc->sem);
912 return err;
915 static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
916 unsigned int order)
918 struct mlx5_mr_cache *cache = &dev->cache;
920 if (order < cache->ent[0].order)
921 return &cache->ent[0];
922 order = order - cache->ent[0].order;
923 if (order > MR_CACHE_LAST_STD_ENTRY)
924 return NULL;
925 return &cache->ent[order];
928 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
929 u64 length, int access_flags)
931 mr->ibmr.lkey = mr->mmkey.key;
932 mr->ibmr.rkey = mr->mmkey.key;
933 mr->ibmr.length = length;
934 mr->ibmr.device = &dev->ib_dev;
935 mr->access_flags = access_flags;
938 static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
939 struct ib_umem *umem, u64 iova,
940 int access_flags)
942 struct mlx5_ib_dev *dev = to_mdev(pd->device);
943 struct mlx5_cache_ent *ent;
944 struct mlx5_ib_mr *mr;
945 unsigned int page_size;
947 page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
948 if (WARN_ON(!page_size))
949 return ERR_PTR(-EINVAL);
950 ent = mr_cache_ent_from_order(
951 dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
953 * Matches access in alloc_cache_mr(). If the MR can't come from the
954 * cache then synchronously create an uncached one.
956 if (!ent || ent->limit == 0 ||
957 !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
958 mutex_lock(&dev->slow_path_mutex);
959 mr = reg_create(pd, umem, iova, access_flags, page_size, false);
960 mutex_unlock(&dev->slow_path_mutex);
961 return mr;
964 mr = get_cache_mr(ent);
965 if (!mr) {
966 mr = create_cache_mr(ent);
968 * The above already tried to do the same stuff as reg_create(),
969 * no reason to try it again.
971 if (IS_ERR(mr))
972 return mr;
975 mr->ibmr.pd = pd;
976 mr->umem = umem;
977 mr->access_flags = access_flags;
978 mr->desc_size = sizeof(struct mlx5_mtt);
979 mr->mmkey.iova = iova;
980 mr->mmkey.size = umem->length;
981 mr->mmkey.pd = to_mpd(pd)->pdn;
982 mr->page_shift = order_base_2(page_size);
983 mr->umem = umem;
984 set_mr_fields(dev, mr, umem->length, access_flags);
986 return mr;
989 #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
990 MLX5_UMR_MTT_ALIGNMENT)
991 #define MLX5_SPARE_UMR_CHUNK 0x10000
994 * Allocate a temporary buffer to hold the per-page information to transfer to
995 * HW. For efficiency this should be as large as it can be, but buffer
996 * allocation failure is not allowed, so try smaller sizes.
998 static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
1000 const size_t xlt_chunk_align =
1001 MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size);
1002 size_t size;
1003 void *res = NULL;
1005 static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
1008 * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
1009 * allocation can't trigger any kind of reclaim.
1011 might_sleep();
1013 gfp_mask |= __GFP_ZERO;
1016 * If the system already has a suitable high order page then just use
1017 * that, but don't try hard to create one. This max is about 1M, so a
1018 * free x86 huge page will satisfy it.
1020 size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
1021 MLX5_MAX_UMR_CHUNK);
1022 *nents = size / ent_size;
1023 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
1024 get_order(size));
1025 if (res)
1026 return res;
1028 if (size > MLX5_SPARE_UMR_CHUNK) {
1029 size = MLX5_SPARE_UMR_CHUNK;
1030 *nents = get_order(size) / ent_size;
1031 res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
1032 get_order(size));
1033 if (res)
1034 return res;
1037 *nents = PAGE_SIZE / ent_size;
1038 res = (void *)__get_free_page(gfp_mask);
1039 if (res)
1040 return res;
1042 mutex_lock(&xlt_emergency_page_mutex);
1043 memset(xlt_emergency_page, 0, PAGE_SIZE);
1044 return xlt_emergency_page;
1047 static void mlx5_ib_free_xlt(void *xlt, size_t length)
1049 if (xlt == xlt_emergency_page) {
1050 mutex_unlock(&xlt_emergency_page_mutex);
1051 return;
1054 free_pages((unsigned long)xlt, get_order(length));
1058 * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
1059 * submission.
1061 static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
1062 struct mlx5_umr_wr *wr, struct ib_sge *sg,
1063 size_t nents, size_t ent_size,
1064 unsigned int flags)
1066 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
1067 struct device *ddev = &dev->mdev->pdev->dev;
1068 dma_addr_t dma;
1069 void *xlt;
1071 xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
1072 flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
1073 GFP_KERNEL);
1074 sg->length = nents * ent_size;
1075 dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
1076 if (dma_mapping_error(ddev, dma)) {
1077 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
1078 mlx5_ib_free_xlt(xlt, sg->length);
1079 return NULL;
1081 sg->addr = dma;
1082 sg->lkey = dev->umrc.pd->local_dma_lkey;
1084 memset(wr, 0, sizeof(*wr));
1085 wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
1086 if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
1087 wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1088 wr->wr.sg_list = sg;
1089 wr->wr.num_sge = 1;
1090 wr->wr.opcode = MLX5_IB_WR_UMR;
1091 wr->pd = mr->ibmr.pd;
1092 wr->mkey = mr->mmkey.key;
1093 wr->length = mr->mmkey.size;
1094 wr->virt_addr = mr->mmkey.iova;
1095 wr->access_flags = mr->access_flags;
1096 wr->page_shift = mr->page_shift;
1097 wr->xlt_size = sg->length;
1098 return xlt;
1101 static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
1102 struct ib_sge *sg)
1104 struct device *ddev = &dev->mdev->pdev->dev;
1106 dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
1107 mlx5_ib_free_xlt(xlt, sg->length);
1110 static unsigned int xlt_wr_final_send_flags(unsigned int flags)
1112 unsigned int res = 0;
1114 if (flags & MLX5_IB_UPD_XLT_ENABLE)
1115 res |= MLX5_IB_SEND_UMR_ENABLE_MR |
1116 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
1117 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1118 if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
1119 res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1120 if (flags & MLX5_IB_UPD_XLT_ADDR)
1121 res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1122 return res;
1125 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
1126 int page_shift, int flags)
1128 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
1129 struct device *ddev = &dev->mdev->pdev->dev;
1130 void *xlt;
1131 struct mlx5_umr_wr wr;
1132 struct ib_sge sg;
1133 int err = 0;
1134 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
1135 ? sizeof(struct mlx5_klm)
1136 : sizeof(struct mlx5_mtt);
1137 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
1138 const int page_mask = page_align - 1;
1139 size_t pages_mapped = 0;
1140 size_t pages_to_map = 0;
1141 size_t pages_iter;
1142 size_t size_to_map = 0;
1143 size_t orig_sg_length;
1145 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
1146 !umr_can_use_indirect_mkey(dev))
1147 return -EPERM;
1149 if (WARN_ON(!mr->umem->is_odp))
1150 return -EINVAL;
1152 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
1153 * so we need to align the offset and length accordingly
1155 if (idx & page_mask) {
1156 npages += idx & page_mask;
1157 idx &= ~page_mask;
1159 pages_to_map = ALIGN(npages, page_align);
1161 xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
1162 if (!xlt)
1163 return -ENOMEM;
1164 pages_iter = sg.length / desc_size;
1165 orig_sg_length = sg.length;
1167 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
1168 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
1169 size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
1171 pages_to_map = min_t(size_t, pages_to_map, max_pages);
1174 wr.page_shift = page_shift;
1176 for (pages_mapped = 0;
1177 pages_mapped < pages_to_map && !err;
1178 pages_mapped += pages_iter, idx += pages_iter) {
1179 npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
1180 size_to_map = npages * desc_size;
1181 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
1182 DMA_TO_DEVICE);
1183 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
1184 dma_sync_single_for_device(ddev, sg.addr, sg.length,
1185 DMA_TO_DEVICE);
1187 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
1189 if (pages_mapped + pages_iter >= pages_to_map)
1190 wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
1192 wr.offset = idx * desc_size;
1193 wr.xlt_size = sg.length;
1195 err = mlx5_ib_post_send_wait(dev, &wr);
1197 sg.length = orig_sg_length;
1198 mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
1199 return err;
1203 * Send the DMA list to the HW for a normal MR using UMR.
1205 static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
1207 struct mlx5_ib_dev *dev = mr_to_mdev(mr);
1208 struct device *ddev = &dev->mdev->pdev->dev;
1209 struct ib_block_iter biter;
1210 struct mlx5_mtt *cur_mtt;
1211 struct mlx5_umr_wr wr;
1212 size_t orig_sg_length;
1213 struct mlx5_mtt *mtt;
1214 size_t final_size;
1215 struct ib_sge sg;
1216 int err = 0;
1218 if (WARN_ON(mr->umem->is_odp))
1219 return -EINVAL;
1221 mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
1222 ib_umem_num_dma_blocks(mr->umem,
1223 1 << mr->page_shift),
1224 sizeof(*mtt), flags);
1225 if (!mtt)
1226 return -ENOMEM;
1227 orig_sg_length = sg.length;
1229 cur_mtt = mtt;
1230 rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap,
1231 BIT(mr->page_shift)) {
1232 if (cur_mtt == (void *)mtt + sg.length) {
1233 dma_sync_single_for_device(ddev, sg.addr, sg.length,
1234 DMA_TO_DEVICE);
1235 err = mlx5_ib_post_send_wait(dev, &wr);
1236 if (err)
1237 goto err;
1238 dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
1239 DMA_TO_DEVICE);
1240 wr.offset += sg.length;
1241 cur_mtt = mtt;
1244 cur_mtt->ptag =
1245 cpu_to_be64(rdma_block_iter_dma_address(&biter) |
1246 MLX5_IB_MTT_PRESENT);
1247 cur_mtt++;
1250 final_size = (void *)cur_mtt - (void *)mtt;
1251 sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
1252 memset(cur_mtt, 0, sg.length - final_size);
1253 wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
1254 wr.xlt_size = sg.length;
1256 dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
1257 err = mlx5_ib_post_send_wait(dev, &wr);
1259 err:
1260 sg.length = orig_sg_length;
1261 mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
1262 return err;
1266 * If ibmr is NULL it will be allocated by reg_create.
1267 * Else, the given ibmr will be used.
1269 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
1270 u64 iova, int access_flags,
1271 unsigned int page_size, bool populate)
1273 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1274 struct mlx5_ib_mr *mr;
1275 __be64 *pas;
1276 void *mkc;
1277 int inlen;
1278 u32 *in;
1279 int err;
1280 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
1282 if (!page_size)
1283 return ERR_PTR(-EINVAL);
1284 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1285 if (!mr)
1286 return ERR_PTR(-ENOMEM);
1288 mr->ibmr.pd = pd;
1289 mr->access_flags = access_flags;
1290 mr->page_shift = order_base_2(page_size);
1292 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1293 if (populate)
1294 inlen += sizeof(*pas) *
1295 roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
1296 in = kvzalloc(inlen, GFP_KERNEL);
1297 if (!in) {
1298 err = -ENOMEM;
1299 goto err_1;
1301 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
1302 if (populate) {
1303 if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
1304 err = -EINVAL;
1305 goto err_2;
1307 mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
1308 pg_cap ? MLX5_IB_MTT_PRESENT : 0);
1311 /* The pg_access bit allows setting the access flags
1312 * in the page list submitted with the command. */
1313 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1315 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1316 set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
1317 populate ? pd : dev->umrc.pd);
1318 MLX5_SET(mkc, mkc, free, !populate);
1319 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
1320 MLX5_SET(mkc, mkc, umr_en, 1);
1322 MLX5_SET64(mkc, mkc, len, umem->length);
1323 MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1324 MLX5_SET(mkc, mkc, translations_octword_size,
1325 get_octo_len(iova, umem->length, mr->page_shift));
1326 MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
1327 if (populate) {
1328 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
1329 get_octo_len(iova, umem->length, mr->page_shift));
1332 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1333 if (err) {
1334 mlx5_ib_warn(dev, "create mkey failed\n");
1335 goto err_2;
1337 mr->mmkey.type = MLX5_MKEY_MR;
1338 mr->desc_size = sizeof(struct mlx5_mtt);
1339 mr->umem = umem;
1340 set_mr_fields(dev, mr, umem->length, access_flags);
1341 kvfree(in);
1343 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
1345 return mr;
1347 err_2:
1348 kvfree(in);
1349 err_1:
1350 kfree(mr);
1351 return ERR_PTR(err);
1354 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
1355 u64 length, int acc, int mode)
1357 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1358 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1359 struct mlx5_ib_mr *mr;
1360 void *mkc;
1361 u32 *in;
1362 int err;
1364 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1365 if (!mr)
1366 return ERR_PTR(-ENOMEM);
1368 in = kzalloc(inlen, GFP_KERNEL);
1369 if (!in) {
1370 err = -ENOMEM;
1371 goto err_free;
1374 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1376 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
1377 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
1378 MLX5_SET64(mkc, mkc, len, length);
1379 set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
1381 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1382 if (err)
1383 goto err_in;
1385 kfree(in);
1387 set_mr_fields(dev, mr, length, acc);
1389 return &mr->ibmr;
1391 err_in:
1392 kfree(in);
1394 err_free:
1395 kfree(mr);
1397 return ERR_PTR(err);
1400 int mlx5_ib_advise_mr(struct ib_pd *pd,
1401 enum ib_uverbs_advise_mr_advice advice,
1402 u32 flags,
1403 struct ib_sge *sg_list,
1404 u32 num_sge,
1405 struct uverbs_attr_bundle *attrs)
1407 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
1408 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
1409 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
1410 return -EOPNOTSUPP;
1412 return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
1413 sg_list, num_sge);
1416 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
1417 struct ib_dm_mr_attr *attr,
1418 struct uverbs_attr_bundle *attrs)
1420 struct mlx5_ib_dm *mdm = to_mdm(dm);
1421 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev;
1422 u64 start_addr = mdm->dev_addr + attr->offset;
1423 int mode;
1425 switch (mdm->type) {
1426 case MLX5_IB_UAPI_DM_TYPE_MEMIC:
1427 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS)
1428 return ERR_PTR(-EINVAL);
1430 mode = MLX5_MKC_ACCESS_MODE_MEMIC;
1431 start_addr -= pci_resource_start(dev->pdev, 0);
1432 break;
1433 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
1434 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
1435 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
1436 return ERR_PTR(-EINVAL);
1438 mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
1439 break;
1440 default:
1441 return ERR_PTR(-EINVAL);
1444 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length,
1445 attr->access_flags, mode);
1448 static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
1449 u64 iova, int access_flags)
1451 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1452 struct mlx5_ib_mr *mr = NULL;
1453 bool xlt_with_umr;
1454 int err;
1456 xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
1457 if (xlt_with_umr) {
1458 mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
1459 } else {
1460 unsigned int page_size = mlx5_umem_find_best_pgsz(
1461 umem, mkc, log_page_size, 0, iova);
1463 mutex_lock(&dev->slow_path_mutex);
1464 mr = reg_create(pd, umem, iova, access_flags, page_size, true);
1465 mutex_unlock(&dev->slow_path_mutex);
1467 if (IS_ERR(mr)) {
1468 ib_umem_release(umem);
1469 return ERR_CAST(mr);
1472 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1474 atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
1476 if (xlt_with_umr) {
1478 * If the MR was created with reg_create then it will be
1479 * configured properly but left disabled. It is safe to go ahead
1480 * and configure it again via UMR while enabling it.
1482 err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
1483 if (err) {
1484 dereg_mr(dev, mr);
1485 return ERR_PTR(err);
1488 return &mr->ibmr;
1491 static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
1492 u64 iova, int access_flags,
1493 struct ib_udata *udata)
1495 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1496 struct ib_umem_odp *odp;
1497 struct mlx5_ib_mr *mr;
1498 int err;
1500 if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1501 return ERR_PTR(-EOPNOTSUPP);
1503 if (!start && length == U64_MAX) {
1504 if (iova != 0)
1505 return ERR_PTR(-EINVAL);
1506 if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1507 return ERR_PTR(-EINVAL);
1509 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
1510 if (IS_ERR(mr))
1511 return ERR_CAST(mr);
1512 return &mr->ibmr;
1515 /* ODP requires xlt update via umr to work. */
1516 if (!mlx5_ib_can_load_pas_with_umr(dev, length))
1517 return ERR_PTR(-EINVAL);
1519 odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
1520 &mlx5_mn_ops);
1521 if (IS_ERR(odp))
1522 return ERR_CAST(odp);
1524 mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
1525 if (IS_ERR(mr)) {
1526 ib_umem_release(&odp->umem);
1527 return ERR_CAST(mr);
1530 odp->private = mr;
1531 init_waitqueue_head(&mr->q_deferred_work);
1532 atomic_set(&mr->num_deferred_work, 0);
1533 err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
1534 &mr->mmkey, GFP_KERNEL));
1535 if (err)
1536 goto err_dereg_mr;
1538 err = mlx5_ib_init_odp_mr(mr);
1539 if (err)
1540 goto err_dereg_mr;
1541 return &mr->ibmr;
1543 err_dereg_mr:
1544 dereg_mr(dev, mr);
1545 return ERR_PTR(err);
1548 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1549 u64 iova, int access_flags,
1550 struct ib_udata *udata)
1552 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1553 struct ib_umem *umem;
1555 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1556 return ERR_PTR(-EOPNOTSUPP);
1558 mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
1559 start, iova, length, access_flags);
1561 if (access_flags & IB_ACCESS_ON_DEMAND)
1562 return create_user_odp_mr(pd, start, length, iova, access_flags,
1563 udata);
1564 umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
1565 if (IS_ERR(umem))
1566 return ERR_CAST(umem);
1567 return create_real_mr(pd, umem, iova, access_flags);
1571 * mlx5_mr_cache_invalidate - Fence all DMA on the MR
1572 * @mr: The MR to fence
1574 * Upon return the NIC will not be doing any DMA to the pages under the MR,
1575 * and any DMA inprogress will be completed. Failure of this function
1576 * indicates the HW has failed catastrophically.
1578 int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr)
1580 struct mlx5_umr_wr umrwr = {};
1582 if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
1583 return 0;
1585 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
1586 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1587 umrwr.wr.opcode = MLX5_IB_WR_UMR;
1588 umrwr.pd = mr_to_mdev(mr)->umrc.pd;
1589 umrwr.mkey = mr->mmkey.key;
1590 umrwr.ignore_free_state = 1;
1592 return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr);
1596 * True if the change in access flags can be done via UMR, only some access
1597 * flags can be updated.
1599 static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
1600 unsigned int current_access_flags,
1601 unsigned int target_access_flags)
1603 unsigned int diffs = current_access_flags ^ target_access_flags;
1605 if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
1606 IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
1607 return false;
1608 return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags,
1609 target_access_flags);
1612 static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
1613 int access_flags)
1615 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1616 struct mlx5_umr_wr umrwr = {
1617 .wr = {
1618 .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
1619 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS,
1620 .opcode = MLX5_IB_WR_UMR,
1622 .mkey = mr->mmkey.key,
1623 .pd = pd,
1624 .access_flags = access_flags,
1626 int err;
1628 err = mlx5_ib_post_send_wait(dev, &umrwr);
1629 if (err)
1630 return err;
1632 mr->access_flags = access_flags;
1633 mr->mmkey.pd = to_mpd(pd)->pdn;
1634 return 0;
1637 static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
1638 struct ib_umem *new_umem,
1639 int new_access_flags, u64 iova,
1640 unsigned long *page_size)
1642 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1644 /* We only track the allocated sizes of MRs from the cache */
1645 if (!mr->cache_ent)
1646 return false;
1647 if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length))
1648 return false;
1650 *page_size =
1651 mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
1652 if (WARN_ON(!*page_size))
1653 return false;
1654 return (1ULL << mr->cache_ent->order) >=
1655 ib_umem_num_dma_blocks(new_umem, *page_size);
1658 static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
1659 int access_flags, int flags, struct ib_umem *new_umem,
1660 u64 iova, unsigned long page_size)
1662 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1663 int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE;
1664 struct ib_umem *old_umem = mr->umem;
1665 int err;
1668 * To keep everything simple the MR is revoked before we start to mess
1669 * with it. This ensure the change is atomic relative to any use of the
1670 * MR.
1672 err = mlx5_mr_cache_invalidate(mr);
1673 if (err)
1674 return err;
1676 if (flags & IB_MR_REREG_PD) {
1677 mr->ibmr.pd = pd;
1678 mr->mmkey.pd = to_mpd(pd)->pdn;
1679 upd_flags |= MLX5_IB_UPD_XLT_PD;
1681 if (flags & IB_MR_REREG_ACCESS) {
1682 mr->access_flags = access_flags;
1683 upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
1686 mr->ibmr.length = new_umem->length;
1687 mr->mmkey.iova = iova;
1688 mr->mmkey.size = new_umem->length;
1689 mr->page_shift = order_base_2(page_size);
1690 mr->umem = new_umem;
1691 err = mlx5_ib_update_mr_pas(mr, upd_flags);
1692 if (err) {
1694 * The MR is revoked at this point so there is no issue to free
1695 * new_umem.
1697 mr->umem = old_umem;
1698 return err;
1701 atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages);
1702 ib_umem_release(old_umem);
1703 atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages);
1704 return 0;
1707 struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1708 u64 length, u64 iova, int new_access_flags,
1709 struct ib_pd *new_pd,
1710 struct ib_udata *udata)
1712 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1713 struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1714 int err;
1716 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1717 return ERR_PTR(-EOPNOTSUPP);
1719 mlx5_ib_dbg(
1720 dev,
1721 "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
1722 start, iova, length, new_access_flags);
1724 if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
1725 return ERR_PTR(-EOPNOTSUPP);
1727 if (!(flags & IB_MR_REREG_ACCESS))
1728 new_access_flags = mr->access_flags;
1729 if (!(flags & IB_MR_REREG_PD))
1730 new_pd = ib_mr->pd;
1732 if (!(flags & IB_MR_REREG_TRANS)) {
1733 struct ib_umem *umem;
1735 /* Fast path for PD/access change */
1736 if (can_use_umr_rereg_access(dev, mr->access_flags,
1737 new_access_flags)) {
1738 err = umr_rereg_pd_access(mr, new_pd, new_access_flags);
1739 if (err)
1740 return ERR_PTR(err);
1741 return NULL;
1743 /* DM or ODP MR's don't have a umem so we can't re-use it */
1744 if (!mr->umem || is_odp_mr(mr))
1745 goto recreate;
1748 * Only one active MR can refer to a umem at one time, revoke
1749 * the old MR before assigning the umem to the new one.
1751 err = mlx5_mr_cache_invalidate(mr);
1752 if (err)
1753 return ERR_PTR(err);
1754 umem = mr->umem;
1755 mr->umem = NULL;
1756 atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
1758 return create_real_mr(new_pd, umem, mr->mmkey.iova,
1759 new_access_flags);
1763 * DM doesn't have a PAS list so we can't re-use it, odp does but the
1764 * logic around releasing the umem is different
1766 if (!mr->umem || is_odp_mr(mr))
1767 goto recreate;
1769 if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
1770 can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) {
1771 struct ib_umem *new_umem;
1772 unsigned long page_size;
1774 new_umem = ib_umem_get(&dev->ib_dev, start, length,
1775 new_access_flags);
1776 if (IS_ERR(new_umem))
1777 return ERR_CAST(new_umem);
1779 /* Fast path for PAS change */
1780 if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova,
1781 &page_size)) {
1782 err = umr_rereg_pas(mr, new_pd, new_access_flags, flags,
1783 new_umem, iova, page_size);
1784 if (err) {
1785 ib_umem_release(new_umem);
1786 return ERR_PTR(err);
1788 return NULL;
1790 return create_real_mr(new_pd, new_umem, iova, new_access_flags);
1794 * Everything else has no state we can preserve, just create a new MR
1795 * from scratch
1797 recreate:
1798 return mlx5_ib_reg_user_mr(new_pd, start, length, iova,
1799 new_access_flags, udata);
1802 static int
1803 mlx5_alloc_priv_descs(struct ib_device *device,
1804 struct mlx5_ib_mr *mr,
1805 int ndescs,
1806 int desc_size)
1808 struct mlx5_ib_dev *dev = to_mdev(device);
1809 struct device *ddev = &dev->mdev->pdev->dev;
1810 int size = ndescs * desc_size;
1811 int add_size;
1812 int ret;
1814 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1816 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1817 if (!mr->descs_alloc)
1818 return -ENOMEM;
1820 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1822 mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE);
1823 if (dma_mapping_error(ddev, mr->desc_map)) {
1824 ret = -ENOMEM;
1825 goto err;
1828 return 0;
1829 err:
1830 kfree(mr->descs_alloc);
1832 return ret;
1835 static void
1836 mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1838 if (mr->descs) {
1839 struct ib_device *device = mr->ibmr.device;
1840 int size = mr->max_descs * mr->desc_size;
1841 struct mlx5_ib_dev *dev = to_mdev(device);
1843 dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
1844 DMA_TO_DEVICE);
1845 kfree(mr->descs_alloc);
1846 mr->descs = NULL;
1850 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1852 if (mr->sig) {
1853 if (mlx5_core_destroy_psv(dev->mdev,
1854 mr->sig->psv_memory.psv_idx))
1855 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1856 mr->sig->psv_memory.psv_idx);
1857 if (mlx5_core_destroy_psv(dev->mdev,
1858 mr->sig->psv_wire.psv_idx))
1859 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1860 mr->sig->psv_wire.psv_idx);
1861 xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key));
1862 kfree(mr->sig);
1863 mr->sig = NULL;
1866 if (!mr->cache_ent) {
1867 destroy_mkey(dev, mr);
1868 mlx5_free_priv_descs(mr);
1872 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1874 struct ib_umem *umem = mr->umem;
1876 /* Stop all DMA */
1877 if (is_odp_mr(mr))
1878 mlx5_ib_fence_odp_mr(mr);
1879 else
1880 clean_mr(dev, mr);
1882 if (umem) {
1883 if (!is_odp_mr(mr))
1884 atomic_sub(ib_umem_num_pages(umem),
1885 &dev->mdev->priv.reg_pages);
1886 ib_umem_release(umem);
1889 if (mr->cache_ent)
1890 mlx5_mr_cache_free(dev, mr);
1891 else
1892 kfree(mr);
1895 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1897 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1899 if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
1900 dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr);
1901 dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
1904 if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) {
1905 mlx5_ib_free_implicit_mr(mmr);
1906 return 0;
1909 dereg_mr(to_mdev(ibmr->device), mmr);
1911 return 0;
1914 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
1915 int access_mode, int page_shift)
1917 void *mkc;
1919 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1921 /* This is only used from the kernel, so setting the PD is OK. */
1922 set_mkc_access_pd_addr_fields(mkc, 0, 0, pd);
1923 MLX5_SET(mkc, mkc, free, 1);
1924 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1925 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
1926 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
1927 MLX5_SET(mkc, mkc, umr_en, 1);
1928 MLX5_SET(mkc, mkc, log_page_size, page_shift);
1931 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1932 int ndescs, int desc_size, int page_shift,
1933 int access_mode, u32 *in, int inlen)
1935 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1936 int err;
1938 mr->access_mode = access_mode;
1939 mr->desc_size = desc_size;
1940 mr->max_descs = ndescs;
1942 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
1943 if (err)
1944 return err;
1946 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
1948 err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
1949 if (err)
1950 goto err_free_descs;
1952 mr->mmkey.type = MLX5_MKEY_MR;
1953 mr->ibmr.lkey = mr->mmkey.key;
1954 mr->ibmr.rkey = mr->mmkey.key;
1956 return 0;
1958 err_free_descs:
1959 mlx5_free_priv_descs(mr);
1960 return err;
1963 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
1964 u32 max_num_sg, u32 max_num_meta_sg,
1965 int desc_size, int access_mode)
1967 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1968 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
1969 int page_shift = 0;
1970 struct mlx5_ib_mr *mr;
1971 u32 *in;
1972 int err;
1974 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1975 if (!mr)
1976 return ERR_PTR(-ENOMEM);
1978 mr->ibmr.pd = pd;
1979 mr->ibmr.device = pd->device;
1981 in = kzalloc(inlen, GFP_KERNEL);
1982 if (!in) {
1983 err = -ENOMEM;
1984 goto err_free;
1987 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
1988 page_shift = PAGE_SHIFT;
1990 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
1991 access_mode, in, inlen);
1992 if (err)
1993 goto err_free_in;
1995 mr->umem = NULL;
1996 kfree(in);
1998 return mr;
2000 err_free_in:
2001 kfree(in);
2002 err_free:
2003 kfree(mr);
2004 return ERR_PTR(err);
2007 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
2008 int ndescs, u32 *in, int inlen)
2010 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
2011 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
2012 inlen);
2015 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
2016 int ndescs, u32 *in, int inlen)
2018 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
2019 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
2022 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
2023 int max_num_sg, int max_num_meta_sg,
2024 u32 *in, int inlen)
2026 struct mlx5_ib_dev *dev = to_mdev(pd->device);
2027 u32 psv_index[2];
2028 void *mkc;
2029 int err;
2031 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
2032 if (!mr->sig)
2033 return -ENOMEM;
2035 /* create mem & wire PSVs */
2036 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
2037 if (err)
2038 goto err_free_sig;
2040 mr->sig->psv_memory.psv_idx = psv_index[0];
2041 mr->sig->psv_wire.psv_idx = psv_index[1];
2043 mr->sig->sig_status_checked = true;
2044 mr->sig->sig_err_exists = false;
2045 /* Next UMR, Arm SIGERR */
2046 ++mr->sig->sigerr_count;
2047 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
2048 sizeof(struct mlx5_klm),
2049 MLX5_MKC_ACCESS_MODE_KLMS);
2050 if (IS_ERR(mr->klm_mr)) {
2051 err = PTR_ERR(mr->klm_mr);
2052 goto err_destroy_psv;
2054 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
2055 sizeof(struct mlx5_mtt),
2056 MLX5_MKC_ACCESS_MODE_MTT);
2057 if (IS_ERR(mr->mtt_mr)) {
2058 err = PTR_ERR(mr->mtt_mr);
2059 goto err_free_klm_mr;
2062 /* Set bsf descriptors for mkey */
2063 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
2064 MLX5_SET(mkc, mkc, bsf_en, 1);
2065 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
2067 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
2068 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
2069 if (err)
2070 goto err_free_mtt_mr;
2072 err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
2073 mr->sig, GFP_KERNEL));
2074 if (err)
2075 goto err_free_descs;
2076 return 0;
2078 err_free_descs:
2079 destroy_mkey(dev, mr);
2080 mlx5_free_priv_descs(mr);
2081 err_free_mtt_mr:
2082 dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
2083 mr->mtt_mr = NULL;
2084 err_free_klm_mr:
2085 dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr);
2086 mr->klm_mr = NULL;
2087 err_destroy_psv:
2088 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
2089 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
2090 mr->sig->psv_memory.psv_idx);
2091 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
2092 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
2093 mr->sig->psv_wire.psv_idx);
2094 err_free_sig:
2095 kfree(mr->sig);
2097 return err;
2100 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
2101 enum ib_mr_type mr_type, u32 max_num_sg,
2102 u32 max_num_meta_sg)
2104 struct mlx5_ib_dev *dev = to_mdev(pd->device);
2105 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
2106 int ndescs = ALIGN(max_num_sg, 4);
2107 struct mlx5_ib_mr *mr;
2108 u32 *in;
2109 int err;
2111 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2112 if (!mr)
2113 return ERR_PTR(-ENOMEM);
2115 in = kzalloc(inlen, GFP_KERNEL);
2116 if (!in) {
2117 err = -ENOMEM;
2118 goto err_free;
2121 mr->ibmr.device = pd->device;
2122 mr->umem = NULL;
2124 switch (mr_type) {
2125 case IB_MR_TYPE_MEM_REG:
2126 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
2127 break;
2128 case IB_MR_TYPE_SG_GAPS:
2129 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
2130 break;
2131 case IB_MR_TYPE_INTEGRITY:
2132 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
2133 max_num_meta_sg, in, inlen);
2134 break;
2135 default:
2136 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
2137 err = -EINVAL;
2140 if (err)
2141 goto err_free_in;
2143 kfree(in);
2145 return &mr->ibmr;
2147 err_free_in:
2148 kfree(in);
2149 err_free:
2150 kfree(mr);
2151 return ERR_PTR(err);
2154 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
2155 u32 max_num_sg)
2157 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
2160 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
2161 u32 max_num_sg, u32 max_num_meta_sg)
2163 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
2164 max_num_meta_sg);
2167 int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
2169 struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
2170 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
2171 struct mlx5_ib_mw *mw = to_mmw(ibmw);
2172 u32 *in = NULL;
2173 void *mkc;
2174 int ndescs;
2175 int err;
2176 struct mlx5_ib_alloc_mw req = {};
2177 struct {
2178 __u32 comp_mask;
2179 __u32 response_length;
2180 } resp = {};
2182 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
2183 if (err)
2184 return err;
2186 if (req.comp_mask || req.reserved1 || req.reserved2)
2187 return -EOPNOTSUPP;
2189 if (udata->inlen > sizeof(req) &&
2190 !ib_is_udata_cleared(udata, sizeof(req),
2191 udata->inlen - sizeof(req)))
2192 return -EOPNOTSUPP;
2194 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
2196 in = kzalloc(inlen, GFP_KERNEL);
2197 if (!in) {
2198 err = -ENOMEM;
2199 goto free;
2202 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
2204 MLX5_SET(mkc, mkc, free, 1);
2205 MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
2206 MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
2207 MLX5_SET(mkc, mkc, umr_en, 1);
2208 MLX5_SET(mkc, mkc, lr, 1);
2209 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
2210 MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
2211 MLX5_SET(mkc, mkc, qpn, 0xffffff);
2213 err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
2214 if (err)
2215 goto free;
2217 mw->mmkey.type = MLX5_MKEY_MW;
2218 ibmw->rkey = mw->mmkey.key;
2219 mw->ndescs = ndescs;
2221 resp.response_length =
2222 min(offsetofend(typeof(resp), response_length), udata->outlen);
2223 if (resp.response_length) {
2224 err = ib_copy_to_udata(udata, &resp, resp.response_length);
2225 if (err)
2226 goto free_mkey;
2229 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
2230 err = xa_err(xa_store(&dev->odp_mkeys,
2231 mlx5_base_mkey(mw->mmkey.key), &mw->mmkey,
2232 GFP_KERNEL));
2233 if (err)
2234 goto free_mkey;
2237 kfree(in);
2238 return 0;
2240 free_mkey:
2241 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
2242 free:
2243 kfree(in);
2244 return err;
2247 int mlx5_ib_dealloc_mw(struct ib_mw *mw)
2249 struct mlx5_ib_dev *dev = to_mdev(mw->device);
2250 struct mlx5_ib_mw *mmw = to_mmw(mw);
2252 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
2253 xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key));
2255 * pagefault_single_data_segment() may be accessing mmw under
2256 * SRCU if the user bound an ODP MR to this MW.
2258 synchronize_srcu(&dev->odp_srcu);
2261 return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
2264 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
2265 struct ib_mr_status *mr_status)
2267 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
2268 int ret = 0;
2270 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
2271 pr_err("Invalid status check mask\n");
2272 ret = -EINVAL;
2273 goto done;
2276 mr_status->fail_status = 0;
2277 if (check_mask & IB_MR_CHECK_SIG_STATUS) {
2278 if (!mmr->sig) {
2279 ret = -EINVAL;
2280 pr_err("signature status check requested on a non-signature enabled MR\n");
2281 goto done;
2284 mmr->sig->sig_status_checked = true;
2285 if (!mmr->sig->sig_err_exists)
2286 goto done;
2288 if (ibmr->lkey == mmr->sig->err_item.key)
2289 memcpy(&mr_status->sig_err, &mmr->sig->err_item,
2290 sizeof(mr_status->sig_err));
2291 else {
2292 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
2293 mr_status->sig_err.sig_err_offset = 0;
2294 mr_status->sig_err.key = mmr->sig->err_item.key;
2297 mmr->sig->sig_err_exists = false;
2298 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
2301 done:
2302 return ret;
2305 static int
2306 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2307 int data_sg_nents, unsigned int *data_sg_offset,
2308 struct scatterlist *meta_sg, int meta_sg_nents,
2309 unsigned int *meta_sg_offset)
2311 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2312 unsigned int sg_offset = 0;
2313 int n = 0;
2315 mr->meta_length = 0;
2316 if (data_sg_nents == 1) {
2317 n++;
2318 mr->ndescs = 1;
2319 if (data_sg_offset)
2320 sg_offset = *data_sg_offset;
2321 mr->data_length = sg_dma_len(data_sg) - sg_offset;
2322 mr->data_iova = sg_dma_address(data_sg) + sg_offset;
2323 if (meta_sg_nents == 1) {
2324 n++;
2325 mr->meta_ndescs = 1;
2326 if (meta_sg_offset)
2327 sg_offset = *meta_sg_offset;
2328 else
2329 sg_offset = 0;
2330 mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
2331 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
2333 ibmr->length = mr->data_length + mr->meta_length;
2336 return n;
2339 static int
2340 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
2341 struct scatterlist *sgl,
2342 unsigned short sg_nents,
2343 unsigned int *sg_offset_p,
2344 struct scatterlist *meta_sgl,
2345 unsigned short meta_sg_nents,
2346 unsigned int *meta_sg_offset_p)
2348 struct scatterlist *sg = sgl;
2349 struct mlx5_klm *klms = mr->descs;
2350 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
2351 u32 lkey = mr->ibmr.pd->local_dma_lkey;
2352 int i, j = 0;
2354 mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
2355 mr->ibmr.length = 0;
2357 for_each_sg(sgl, sg, sg_nents, i) {
2358 if (unlikely(i >= mr->max_descs))
2359 break;
2360 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
2361 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
2362 klms[i].key = cpu_to_be32(lkey);
2363 mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2365 sg_offset = 0;
2368 if (sg_offset_p)
2369 *sg_offset_p = sg_offset;
2371 mr->ndescs = i;
2372 mr->data_length = mr->ibmr.length;
2374 if (meta_sg_nents) {
2375 sg = meta_sgl;
2376 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
2377 for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
2378 if (unlikely(i + j >= mr->max_descs))
2379 break;
2380 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
2381 sg_offset);
2382 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
2383 sg_offset);
2384 klms[i + j].key = cpu_to_be32(lkey);
2385 mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2387 sg_offset = 0;
2389 if (meta_sg_offset_p)
2390 *meta_sg_offset_p = sg_offset;
2392 mr->meta_ndescs = j;
2393 mr->meta_length = mr->ibmr.length - mr->data_length;
2396 return i + j;
2399 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
2401 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2402 __be64 *descs;
2404 if (unlikely(mr->ndescs == mr->max_descs))
2405 return -ENOMEM;
2407 descs = mr->descs;
2408 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2410 return 0;
2413 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
2415 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2416 __be64 *descs;
2418 if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs))
2419 return -ENOMEM;
2421 descs = mr->descs;
2422 descs[mr->ndescs + mr->meta_ndescs++] =
2423 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2425 return 0;
2428 static int
2429 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2430 int data_sg_nents, unsigned int *data_sg_offset,
2431 struct scatterlist *meta_sg, int meta_sg_nents,
2432 unsigned int *meta_sg_offset)
2434 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2435 struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
2436 int n;
2438 pi_mr->ndescs = 0;
2439 pi_mr->meta_ndescs = 0;
2440 pi_mr->meta_length = 0;
2442 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2443 pi_mr->desc_size * pi_mr->max_descs,
2444 DMA_TO_DEVICE);
2446 pi_mr->ibmr.page_size = ibmr->page_size;
2447 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
2448 mlx5_set_page);
2449 if (n != data_sg_nents)
2450 return n;
2452 pi_mr->data_iova = pi_mr->ibmr.iova;
2453 pi_mr->data_length = pi_mr->ibmr.length;
2454 pi_mr->ibmr.length = pi_mr->data_length;
2455 ibmr->length = pi_mr->data_length;
2457 if (meta_sg_nents) {
2458 u64 page_mask = ~((u64)ibmr->page_size - 1);
2459 u64 iova = pi_mr->data_iova;
2461 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
2462 meta_sg_offset, mlx5_set_page_pi);
2464 pi_mr->meta_length = pi_mr->ibmr.length;
2466 * PI address for the HW is the offset of the metadata address
2467 * relative to the first data page address.
2468 * It equals to first data page address + size of data pages +
2469 * metadata offset at the first metadata page
2471 pi_mr->pi_iova = (iova & page_mask) +
2472 pi_mr->ndescs * ibmr->page_size +
2473 (pi_mr->ibmr.iova & ~page_mask);
2475 * In order to use one MTT MR for data and metadata, we register
2476 * also the gaps between the end of the data and the start of
2477 * the metadata (the sig MR will verify that the HW will access
2478 * to right addresses). This mapping is safe because we use
2479 * internal mkey for the registration.
2481 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
2482 pi_mr->ibmr.iova = iova;
2483 ibmr->length += pi_mr->meta_length;
2486 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2487 pi_mr->desc_size * pi_mr->max_descs,
2488 DMA_TO_DEVICE);
2490 return n;
2493 static int
2494 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2495 int data_sg_nents, unsigned int *data_sg_offset,
2496 struct scatterlist *meta_sg, int meta_sg_nents,
2497 unsigned int *meta_sg_offset)
2499 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2500 struct mlx5_ib_mr *pi_mr = mr->klm_mr;
2501 int n;
2503 pi_mr->ndescs = 0;
2504 pi_mr->meta_ndescs = 0;
2505 pi_mr->meta_length = 0;
2507 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2508 pi_mr->desc_size * pi_mr->max_descs,
2509 DMA_TO_DEVICE);
2511 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
2512 meta_sg, meta_sg_nents, meta_sg_offset);
2514 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2515 pi_mr->desc_size * pi_mr->max_descs,
2516 DMA_TO_DEVICE);
2518 /* This is zero-based memory region */
2519 pi_mr->data_iova = 0;
2520 pi_mr->ibmr.iova = 0;
2521 pi_mr->pi_iova = pi_mr->data_length;
2522 ibmr->length = pi_mr->ibmr.length;
2524 return n;
2527 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2528 int data_sg_nents, unsigned int *data_sg_offset,
2529 struct scatterlist *meta_sg, int meta_sg_nents,
2530 unsigned int *meta_sg_offset)
2532 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2533 struct mlx5_ib_mr *pi_mr = NULL;
2534 int n;
2536 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
2538 mr->ndescs = 0;
2539 mr->data_length = 0;
2540 mr->data_iova = 0;
2541 mr->meta_ndescs = 0;
2542 mr->pi_iova = 0;
2544 * As a performance optimization, if possible, there is no need to
2545 * perform UMR operation to register the data/metadata buffers.
2546 * First try to map the sg lists to PA descriptors with local_dma_lkey.
2547 * Fallback to UMR only in case of a failure.
2549 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2550 data_sg_offset, meta_sg, meta_sg_nents,
2551 meta_sg_offset);
2552 if (n == data_sg_nents + meta_sg_nents)
2553 goto out;
2555 * As a performance optimization, if possible, there is no need to map
2556 * the sg lists to KLM descriptors. First try to map the sg lists to MTT
2557 * descriptors and fallback to KLM only in case of a failure.
2558 * It's more efficient for the HW to work with MTT descriptors
2559 * (especially in high load).
2560 * Use KLM (indirect access) only if it's mandatory.
2562 pi_mr = mr->mtt_mr;
2563 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2564 data_sg_offset, meta_sg, meta_sg_nents,
2565 meta_sg_offset);
2566 if (n == data_sg_nents + meta_sg_nents)
2567 goto out;
2569 pi_mr = mr->klm_mr;
2570 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2571 data_sg_offset, meta_sg, meta_sg_nents,
2572 meta_sg_offset);
2573 if (unlikely(n != data_sg_nents + meta_sg_nents))
2574 return -ENOMEM;
2576 out:
2577 /* This is zero-based memory region */
2578 ibmr->iova = 0;
2579 mr->pi_mr = pi_mr;
2580 if (pi_mr)
2581 ibmr->sig_attrs->meta_length = pi_mr->meta_length;
2582 else
2583 ibmr->sig_attrs->meta_length = mr->meta_length;
2585 return 0;
2588 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
2589 unsigned int *sg_offset)
2591 struct mlx5_ib_mr *mr = to_mmr(ibmr);
2592 int n;
2594 mr->ndescs = 0;
2596 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
2597 mr->desc_size * mr->max_descs,
2598 DMA_TO_DEVICE);
2600 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
2601 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
2602 NULL);
2603 else
2604 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
2605 mlx5_set_page);
2607 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
2608 mr->desc_size * mr->max_descs,
2609 DMA_TO_DEVICE);
2611 return n;