2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/kref.h>
35 #include <linux/random.h>
36 #include <linux/debugfs.h>
37 #include <linux/export.h>
38 #include <linux/delay.h>
39 #include <rdma/ib_umem.h>
43 MAX_PENDING_REG_MR
= 8,
50 static __be64
*mr_align(__be64
*ptr
, int align
)
52 unsigned long mask
= align
- 1;
54 return (__be64
*)(((unsigned long)ptr
+ mask
) & ~mask
);
57 static int order2idx(struct mlx5_ib_dev
*dev
, int order
)
59 struct mlx5_mr_cache
*cache
= &dev
->cache
;
61 if (order
< cache
->ent
[0].order
)
64 return order
- cache
->ent
[0].order
;
67 static void reg_mr_callback(int status
, void *context
)
69 struct mlx5_ib_mr
*mr
= context
;
70 struct mlx5_ib_dev
*dev
= mr
->dev
;
71 struct mlx5_mr_cache
*cache
= &dev
->cache
;
72 int c
= order2idx(dev
, mr
->order
);
73 struct mlx5_cache_ent
*ent
= &cache
->ent
[c
];
77 spin_lock_irqsave(&ent
->lock
, flags
);
79 spin_unlock_irqrestore(&ent
->lock
, flags
);
81 mlx5_ib_warn(dev
, "async reg mr failed. status %d\n", status
);
84 mod_timer(&dev
->delay_timer
, jiffies
+ HZ
);
88 if (mr
->out
.hdr
.status
) {
89 mlx5_ib_warn(dev
, "failed - status %d, syndorme 0x%x\n",
91 be32_to_cpu(mr
->out
.hdr
.syndrome
));
94 mod_timer(&dev
->delay_timer
, jiffies
+ HZ
);
98 spin_lock_irqsave(&dev
->mdev
.priv
.mkey_lock
, flags
);
99 key
= dev
->mdev
.priv
.mkey_key
++;
100 spin_unlock_irqrestore(&dev
->mdev
.priv
.mkey_lock
, flags
);
101 mr
->mmr
.key
= mlx5_idx_to_mkey(be32_to_cpu(mr
->out
.mkey
) & 0xffffff) | key
;
103 cache
->last_add
= jiffies
;
105 spin_lock_irqsave(&ent
->lock
, flags
);
106 list_add_tail(&mr
->list
, &ent
->head
);
109 spin_unlock_irqrestore(&ent
->lock
, flags
);
112 static int add_keys(struct mlx5_ib_dev
*dev
, int c
, int num
)
114 struct mlx5_mr_cache
*cache
= &dev
->cache
;
115 struct mlx5_cache_ent
*ent
= &cache
->ent
[c
];
116 struct mlx5_create_mkey_mbox_in
*in
;
117 struct mlx5_ib_mr
*mr
;
118 int npages
= 1 << ent
->order
;
122 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
126 for (i
= 0; i
< num
; i
++) {
127 if (ent
->pending
>= MAX_PENDING_REG_MR
) {
132 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
137 mr
->order
= ent
->order
;
140 in
->seg
.status
= 1 << 6;
141 in
->seg
.xlt_oct_size
= cpu_to_be32((npages
+ 1) / 2);
142 in
->seg
.qpn_mkey7_0
= cpu_to_be32(0xffffff << 8);
143 in
->seg
.flags
= MLX5_ACCESS_MODE_MTT
| MLX5_PERM_UMR_EN
;
144 in
->seg
.log2_page_size
= 12;
146 spin_lock_irq(&ent
->lock
);
148 spin_unlock_irq(&ent
->lock
);
149 err
= mlx5_core_create_mkey(&dev
->mdev
, &mr
->mmr
, in
,
150 sizeof(*in
), reg_mr_callback
,
153 mlx5_ib_warn(dev
, "create mkey failed %d\n", err
);
163 static void remove_keys(struct mlx5_ib_dev
*dev
, int c
, int num
)
165 struct mlx5_mr_cache
*cache
= &dev
->cache
;
166 struct mlx5_cache_ent
*ent
= &cache
->ent
[c
];
167 struct mlx5_ib_mr
*mr
;
171 for (i
= 0; i
< num
; i
++) {
172 spin_lock_irq(&ent
->lock
);
173 if (list_empty(&ent
->head
)) {
174 spin_unlock_irq(&ent
->lock
);
177 mr
= list_first_entry(&ent
->head
, struct mlx5_ib_mr
, list
);
181 spin_unlock_irq(&ent
->lock
);
182 err
= mlx5_core_destroy_mkey(&dev
->mdev
, &mr
->mmr
);
184 mlx5_ib_warn(dev
, "failed destroy mkey\n");
190 static ssize_t
size_write(struct file
*filp
, const char __user
*buf
,
191 size_t count
, loff_t
*pos
)
193 struct mlx5_cache_ent
*ent
= filp
->private_data
;
194 struct mlx5_ib_dev
*dev
= ent
->dev
;
200 if (copy_from_user(lbuf
, buf
, sizeof(lbuf
)))
203 c
= order2idx(dev
, ent
->order
);
204 lbuf
[sizeof(lbuf
) - 1] = 0;
206 if (sscanf(lbuf
, "%u", &var
) != 1)
209 if (var
< ent
->limit
)
212 if (var
> ent
->size
) {
214 err
= add_keys(dev
, c
, var
- ent
->size
);
215 if (err
&& err
!= -EAGAIN
)
218 usleep_range(3000, 5000);
220 } else if (var
< ent
->size
) {
221 remove_keys(dev
, c
, ent
->size
- var
);
227 static ssize_t
size_read(struct file
*filp
, char __user
*buf
, size_t count
,
230 struct mlx5_cache_ent
*ent
= filp
->private_data
;
237 err
= snprintf(lbuf
, sizeof(lbuf
), "%d\n", ent
->size
);
241 if (copy_to_user(buf
, lbuf
, err
))
249 static const struct file_operations size_fops
= {
250 .owner
= THIS_MODULE
,
256 static ssize_t
limit_write(struct file
*filp
, const char __user
*buf
,
257 size_t count
, loff_t
*pos
)
259 struct mlx5_cache_ent
*ent
= filp
->private_data
;
260 struct mlx5_ib_dev
*dev
= ent
->dev
;
266 if (copy_from_user(lbuf
, buf
, sizeof(lbuf
)))
269 c
= order2idx(dev
, ent
->order
);
270 lbuf
[sizeof(lbuf
) - 1] = 0;
272 if (sscanf(lbuf
, "%u", &var
) != 1)
280 if (ent
->cur
< ent
->limit
) {
281 err
= add_keys(dev
, c
, 2 * ent
->limit
- ent
->cur
);
289 static ssize_t
limit_read(struct file
*filp
, char __user
*buf
, size_t count
,
292 struct mlx5_cache_ent
*ent
= filp
->private_data
;
299 err
= snprintf(lbuf
, sizeof(lbuf
), "%d\n", ent
->limit
);
303 if (copy_to_user(buf
, lbuf
, err
))
311 static const struct file_operations limit_fops
= {
312 .owner
= THIS_MODULE
,
314 .write
= limit_write
,
318 static int someone_adding(struct mlx5_mr_cache
*cache
)
322 for (i
= 0; i
< MAX_MR_CACHE_ENTRIES
; i
++) {
323 if (cache
->ent
[i
].cur
< cache
->ent
[i
].limit
)
330 static void __cache_work_func(struct mlx5_cache_ent
*ent
)
332 struct mlx5_ib_dev
*dev
= ent
->dev
;
333 struct mlx5_mr_cache
*cache
= &dev
->cache
;
334 int i
= order2idx(dev
, ent
->order
);
340 ent
= &dev
->cache
.ent
[i
];
341 if (ent
->cur
< 2 * ent
->limit
&& !dev
->fill_delay
) {
342 err
= add_keys(dev
, i
, 1);
343 if (ent
->cur
< 2 * ent
->limit
) {
344 if (err
== -EAGAIN
) {
345 mlx5_ib_dbg(dev
, "returned eagain, order %d\n",
347 queue_delayed_work(cache
->wq
, &ent
->dwork
,
348 msecs_to_jiffies(3));
350 mlx5_ib_warn(dev
, "command failed order %d, err %d\n",
352 queue_delayed_work(cache
->wq
, &ent
->dwork
,
353 msecs_to_jiffies(1000));
355 queue_work(cache
->wq
, &ent
->work
);
358 } else if (ent
->cur
> 2 * ent
->limit
) {
359 if (!someone_adding(cache
) &&
360 time_after(jiffies
, cache
->last_add
+ 300 * HZ
)) {
361 remove_keys(dev
, i
, 1);
362 if (ent
->cur
> ent
->limit
)
363 queue_work(cache
->wq
, &ent
->work
);
365 queue_delayed_work(cache
->wq
, &ent
->dwork
, 300 * HZ
);
370 static void delayed_cache_work_func(struct work_struct
*work
)
372 struct mlx5_cache_ent
*ent
;
374 ent
= container_of(work
, struct mlx5_cache_ent
, dwork
.work
);
375 __cache_work_func(ent
);
378 static void cache_work_func(struct work_struct
*work
)
380 struct mlx5_cache_ent
*ent
;
382 ent
= container_of(work
, struct mlx5_cache_ent
, work
);
383 __cache_work_func(ent
);
386 static struct mlx5_ib_mr
*alloc_cached_mr(struct mlx5_ib_dev
*dev
, int order
)
388 struct mlx5_mr_cache
*cache
= &dev
->cache
;
389 struct mlx5_ib_mr
*mr
= NULL
;
390 struct mlx5_cache_ent
*ent
;
394 c
= order2idx(dev
, order
);
395 if (c
< 0 || c
>= MAX_MR_CACHE_ENTRIES
) {
396 mlx5_ib_warn(dev
, "order %d, cache index %d\n", order
, c
);
400 for (i
= c
; i
< MAX_MR_CACHE_ENTRIES
; i
++) {
401 ent
= &cache
->ent
[i
];
403 mlx5_ib_dbg(dev
, "order %d, cache index %d\n", ent
->order
, i
);
405 spin_lock_irq(&ent
->lock
);
406 if (!list_empty(&ent
->head
)) {
407 mr
= list_first_entry(&ent
->head
, struct mlx5_ib_mr
,
411 spin_unlock_irq(&ent
->lock
);
412 if (ent
->cur
< ent
->limit
)
413 queue_work(cache
->wq
, &ent
->work
);
416 spin_unlock_irq(&ent
->lock
);
418 queue_work(cache
->wq
, &ent
->work
);
425 cache
->ent
[c
].miss
++;
430 static void free_cached_mr(struct mlx5_ib_dev
*dev
, struct mlx5_ib_mr
*mr
)
432 struct mlx5_mr_cache
*cache
= &dev
->cache
;
433 struct mlx5_cache_ent
*ent
;
437 c
= order2idx(dev
, mr
->order
);
438 if (c
< 0 || c
>= MAX_MR_CACHE_ENTRIES
) {
439 mlx5_ib_warn(dev
, "order %d, cache index %d\n", mr
->order
, c
);
442 ent
= &cache
->ent
[c
];
443 spin_lock_irq(&ent
->lock
);
444 list_add_tail(&mr
->list
, &ent
->head
);
446 if (ent
->cur
> 2 * ent
->limit
)
448 spin_unlock_irq(&ent
->lock
);
451 queue_work(cache
->wq
, &ent
->work
);
454 static void clean_keys(struct mlx5_ib_dev
*dev
, int c
)
456 struct mlx5_mr_cache
*cache
= &dev
->cache
;
457 struct mlx5_cache_ent
*ent
= &cache
->ent
[c
];
458 struct mlx5_ib_mr
*mr
;
461 cancel_delayed_work(&ent
->dwork
);
463 spin_lock_irq(&ent
->lock
);
464 if (list_empty(&ent
->head
)) {
465 spin_unlock_irq(&ent
->lock
);
468 mr
= list_first_entry(&ent
->head
, struct mlx5_ib_mr
, list
);
472 spin_unlock_irq(&ent
->lock
);
473 err
= mlx5_core_destroy_mkey(&dev
->mdev
, &mr
->mmr
);
475 mlx5_ib_warn(dev
, "failed destroy mkey\n");
481 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev
*dev
)
483 struct mlx5_mr_cache
*cache
= &dev
->cache
;
484 struct mlx5_cache_ent
*ent
;
487 if (!mlx5_debugfs_root
)
490 cache
->root
= debugfs_create_dir("mr_cache", dev
->mdev
.priv
.dbg_root
);
494 for (i
= 0; i
< MAX_MR_CACHE_ENTRIES
; i
++) {
495 ent
= &cache
->ent
[i
];
496 sprintf(ent
->name
, "%d", ent
->order
);
497 ent
->dir
= debugfs_create_dir(ent
->name
, cache
->root
);
501 ent
->fsize
= debugfs_create_file("size", 0600, ent
->dir
, ent
,
506 ent
->flimit
= debugfs_create_file("limit", 0600, ent
->dir
, ent
,
511 ent
->fcur
= debugfs_create_u32("cur", 0400, ent
->dir
,
516 ent
->fmiss
= debugfs_create_u32("miss", 0600, ent
->dir
,
525 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev
*dev
)
527 if (!mlx5_debugfs_root
)
530 debugfs_remove_recursive(dev
->cache
.root
);
533 static void delay_time_func(unsigned long ctx
)
535 struct mlx5_ib_dev
*dev
= (struct mlx5_ib_dev
*)ctx
;
540 int mlx5_mr_cache_init(struct mlx5_ib_dev
*dev
)
542 struct mlx5_mr_cache
*cache
= &dev
->cache
;
543 struct mlx5_cache_ent
*ent
;
548 cache
->wq
= create_singlethread_workqueue("mkey_cache");
550 mlx5_ib_warn(dev
, "failed to create work queue\n");
554 setup_timer(&dev
->delay_timer
, delay_time_func
, (unsigned long)dev
);
555 for (i
= 0; i
< MAX_MR_CACHE_ENTRIES
; i
++) {
556 INIT_LIST_HEAD(&cache
->ent
[i
].head
);
557 spin_lock_init(&cache
->ent
[i
].lock
);
559 ent
= &cache
->ent
[i
];
560 INIT_LIST_HEAD(&ent
->head
);
561 spin_lock_init(&ent
->lock
);
565 if (dev
->mdev
.profile
->mask
& MLX5_PROF_MASK_MR_CACHE
)
566 limit
= dev
->mdev
.profile
->mr_cache
[i
].limit
;
570 INIT_WORK(&ent
->work
, cache_work_func
);
571 INIT_DELAYED_WORK(&ent
->dwork
, delayed_cache_work_func
);
573 queue_work(cache
->wq
, &ent
->work
);
576 err
= mlx5_mr_cache_debugfs_init(dev
);
578 mlx5_ib_warn(dev
, "cache debugfs failure\n");
583 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev
*dev
)
587 dev
->cache
.stopped
= 1;
588 flush_workqueue(dev
->cache
.wq
);
590 mlx5_mr_cache_debugfs_cleanup(dev
);
592 for (i
= 0; i
< MAX_MR_CACHE_ENTRIES
; i
++)
595 destroy_workqueue(dev
->cache
.wq
);
596 del_timer_sync(&dev
->delay_timer
);
601 struct ib_mr
*mlx5_ib_get_dma_mr(struct ib_pd
*pd
, int acc
)
603 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
604 struct mlx5_core_dev
*mdev
= &dev
->mdev
;
605 struct mlx5_create_mkey_mbox_in
*in
;
606 struct mlx5_mkey_seg
*seg
;
607 struct mlx5_ib_mr
*mr
;
610 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
612 return ERR_PTR(-ENOMEM
);
614 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
621 seg
->flags
= convert_access(acc
) | MLX5_ACCESS_MODE_PA
;
622 seg
->flags_pd
= cpu_to_be32(to_mpd(pd
)->pdn
| MLX5_MKEY_LEN64
);
623 seg
->qpn_mkey7_0
= cpu_to_be32(0xffffff << 8);
626 err
= mlx5_core_create_mkey(mdev
, &mr
->mmr
, in
, sizeof(*in
), NULL
, NULL
,
632 mr
->ibmr
.lkey
= mr
->mmr
.key
;
633 mr
->ibmr
.rkey
= mr
->mmr
.key
;
647 static int get_octo_len(u64 addr
, u64 len
, int page_size
)
652 offset
= addr
& (page_size
- 1);
653 npages
= ALIGN(len
+ offset
, page_size
) >> ilog2(page_size
);
654 return (npages
+ 1) / 2;
657 static int use_umr(int order
)
662 static void prep_umr_reg_wqe(struct ib_pd
*pd
, struct ib_send_wr
*wr
,
663 struct ib_sge
*sg
, u64 dma
, int n
, u32 key
,
664 int page_shift
, u64 virt_addr
, u64 len
,
667 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
668 struct ib_mr
*mr
= dev
->umrc
.mr
;
671 sg
->length
= ALIGN(sizeof(u64
) * n
, 64);
682 wr
->opcode
= MLX5_IB_WR_UMR
;
683 wr
->wr
.fast_reg
.page_list_len
= n
;
684 wr
->wr
.fast_reg
.page_shift
= page_shift
;
685 wr
->wr
.fast_reg
.rkey
= key
;
686 wr
->wr
.fast_reg
.iova_start
= virt_addr
;
687 wr
->wr
.fast_reg
.length
= len
;
688 wr
->wr
.fast_reg
.access_flags
= access_flags
;
689 wr
->wr
.fast_reg
.page_list
= (struct ib_fast_reg_page_list
*)pd
;
692 static void prep_umr_unreg_wqe(struct mlx5_ib_dev
*dev
,
693 struct ib_send_wr
*wr
, u32 key
)
695 wr
->send_flags
= MLX5_IB_SEND_UMR_UNREG
;
696 wr
->opcode
= MLX5_IB_WR_UMR
;
697 wr
->wr
.fast_reg
.rkey
= key
;
700 void mlx5_umr_cq_handler(struct ib_cq
*cq
, void *cq_context
)
702 struct mlx5_ib_mr
*mr
;
707 err
= ib_poll_cq(cq
, 1, &wc
);
709 pr_warn("poll cq error %d\n", err
);
715 mr
= (struct mlx5_ib_mr
*)(unsigned long)wc
.wr_id
;
716 mr
->status
= wc
.status
;
719 ib_req_notify_cq(cq
, IB_CQ_NEXT_COMP
);
722 static struct mlx5_ib_mr
*reg_umr(struct ib_pd
*pd
, struct ib_umem
*umem
,
723 u64 virt_addr
, u64 len
, int npages
,
724 int page_shift
, int order
, int access_flags
)
726 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
727 struct device
*ddev
= dev
->ib_dev
.dma_device
;
728 struct umr_common
*umrc
= &dev
->umrc
;
729 struct ib_send_wr wr
, *bad
;
730 struct mlx5_ib_mr
*mr
;
732 int size
= sizeof(u64
) * npages
;
736 for (i
= 0; i
< 1; i
++) {
737 mr
= alloc_cached_mr(dev
, order
);
741 err
= add_keys(dev
, order2idx(dev
, order
), 1);
742 if (err
&& err
!= -EAGAIN
) {
743 mlx5_ib_warn(dev
, "add_keys failed, err %d\n", err
);
749 return ERR_PTR(-EAGAIN
);
751 mr
->pas
= kmalloc(size
+ MLX5_UMR_ALIGN
- 1, GFP_KERNEL
);
757 mlx5_ib_populate_pas(dev
, umem
, page_shift
,
758 mr_align(mr
->pas
, MLX5_UMR_ALIGN
), 1);
760 mr
->dma
= dma_map_single(ddev
, mr_align(mr
->pas
, MLX5_UMR_ALIGN
), size
,
762 if (dma_mapping_error(ddev
, mr
->dma
)) {
768 memset(&wr
, 0, sizeof(wr
));
769 wr
.wr_id
= (u64
)(unsigned long)mr
;
770 prep_umr_reg_wqe(pd
, &wr
, &sg
, mr
->dma
, npages
, mr
->mmr
.key
, page_shift
, virt_addr
, len
, access_flags
);
772 /* We serialize polls so one process does not kidnap another's
773 * completion. This is not a problem since wr is completed in
777 init_completion(&mr
->done
);
778 err
= ib_post_send(umrc
->qp
, &wr
, &bad
);
780 mlx5_ib_warn(dev
, "post send failed, err %d\n", err
);
784 wait_for_completion(&mr
->done
);
787 dma_unmap_single(ddev
, mr
->dma
, size
, DMA_TO_DEVICE
);
790 if (mr
->status
!= IB_WC_SUCCESS
) {
791 mlx5_ib_warn(dev
, "reg umr failed\n");
799 free_cached_mr(dev
, mr
);
803 static struct mlx5_ib_mr
*reg_create(struct ib_pd
*pd
, u64 virt_addr
,
804 u64 length
, struct ib_umem
*umem
,
805 int npages
, int page_shift
,
808 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
809 struct mlx5_create_mkey_mbox_in
*in
;
810 struct mlx5_ib_mr
*mr
;
814 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
816 return ERR_PTR(-ENOMEM
);
818 inlen
= sizeof(*in
) + sizeof(*in
->pas
) * ((npages
+ 1) / 2) * 2;
819 in
= mlx5_vzalloc(inlen
);
824 mlx5_ib_populate_pas(dev
, umem
, page_shift
, in
->pas
, 0);
826 in
->seg
.flags
= convert_access(access_flags
) |
827 MLX5_ACCESS_MODE_MTT
;
828 in
->seg
.flags_pd
= cpu_to_be32(to_mpd(pd
)->pdn
);
829 in
->seg
.start_addr
= cpu_to_be64(virt_addr
);
830 in
->seg
.len
= cpu_to_be64(length
);
831 in
->seg
.bsfs_octo_size
= 0;
832 in
->seg
.xlt_oct_size
= cpu_to_be32(get_octo_len(virt_addr
, length
, 1 << page_shift
));
833 in
->seg
.log2_page_size
= page_shift
;
834 in
->seg
.qpn_mkey7_0
= cpu_to_be32(0xffffff << 8);
835 in
->xlat_oct_act_size
= cpu_to_be32(get_octo_len(virt_addr
, length
,
837 err
= mlx5_core_create_mkey(&dev
->mdev
, &mr
->mmr
, in
, inlen
, NULL
,
840 mlx5_ib_warn(dev
, "create mkey failed\n");
846 mlx5_ib_dbg(dev
, "mkey = 0x%x\n", mr
->mmr
.key
);
859 struct ib_mr
*mlx5_ib_reg_user_mr(struct ib_pd
*pd
, u64 start
, u64 length
,
860 u64 virt_addr
, int access_flags
,
861 struct ib_udata
*udata
)
863 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
864 struct mlx5_ib_mr
*mr
= NULL
;
865 struct ib_umem
*umem
;
872 mlx5_ib_dbg(dev
, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
873 start
, virt_addr
, length
);
874 umem
= ib_umem_get(pd
->uobject
->context
, start
, length
, access_flags
,
877 mlx5_ib_dbg(dev
, "umem get failed\n");
881 mlx5_ib_cont_pages(umem
, start
, &npages
, &page_shift
, &ncont
, &order
);
883 mlx5_ib_warn(dev
, "avoid zero region\n");
888 mlx5_ib_dbg(dev
, "npages %d, ncont %d, order %d, page_shift %d\n",
889 npages
, ncont
, order
, page_shift
);
891 if (use_umr(order
)) {
892 mr
= reg_umr(pd
, umem
, virt_addr
, length
, ncont
, page_shift
,
893 order
, access_flags
);
894 if (PTR_ERR(mr
) == -EAGAIN
) {
895 mlx5_ib_dbg(dev
, "cache empty for order %d", order
);
901 mr
= reg_create(pd
, virt_addr
, length
, umem
, ncont
, page_shift
,
909 mlx5_ib_dbg(dev
, "mkey 0x%x\n", mr
->mmr
.key
);
913 spin_lock(&dev
->mr_lock
);
914 dev
->mdev
.priv
.reg_pages
+= npages
;
915 spin_unlock(&dev
->mr_lock
);
916 mr
->ibmr
.lkey
= mr
->mmr
.key
;
917 mr
->ibmr
.rkey
= mr
->mmr
.key
;
922 ib_umem_release(umem
);
926 static int unreg_umr(struct mlx5_ib_dev
*dev
, struct mlx5_ib_mr
*mr
)
928 struct umr_common
*umrc
= &dev
->umrc
;
929 struct ib_send_wr wr
, *bad
;
932 memset(&wr
, 0, sizeof(wr
));
933 wr
.wr_id
= (u64
)(unsigned long)mr
;
934 prep_umr_unreg_wqe(dev
, &wr
, mr
->mmr
.key
);
937 init_completion(&mr
->done
);
938 err
= ib_post_send(umrc
->qp
, &wr
, &bad
);
941 mlx5_ib_dbg(dev
, "err %d\n", err
);
944 wait_for_completion(&mr
->done
);
946 if (mr
->status
!= IB_WC_SUCCESS
) {
947 mlx5_ib_warn(dev
, "unreg umr failed\n");
957 int mlx5_ib_dereg_mr(struct ib_mr
*ibmr
)
959 struct mlx5_ib_dev
*dev
= to_mdev(ibmr
->device
);
960 struct mlx5_ib_mr
*mr
= to_mmr(ibmr
);
961 struct ib_umem
*umem
= mr
->umem
;
962 int npages
= mr
->npages
;
963 int umred
= mr
->umred
;
967 err
= mlx5_core_destroy_mkey(&dev
->mdev
, &mr
->mmr
);
969 mlx5_ib_warn(dev
, "failed to destroy mkey 0x%x (%d)\n",
974 err
= unreg_umr(dev
, mr
);
976 mlx5_ib_warn(dev
, "failed unregister\n");
979 free_cached_mr(dev
, mr
);
983 ib_umem_release(umem
);
984 spin_lock(&dev
->mr_lock
);
985 dev
->mdev
.priv
.reg_pages
-= npages
;
986 spin_unlock(&dev
->mr_lock
);
995 struct ib_mr
*mlx5_ib_alloc_fast_reg_mr(struct ib_pd
*pd
,
996 int max_page_list_len
)
998 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
999 struct mlx5_create_mkey_mbox_in
*in
;
1000 struct mlx5_ib_mr
*mr
;
1003 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1005 return ERR_PTR(-ENOMEM
);
1007 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
1013 in
->seg
.status
= 1 << 6; /* free */
1014 in
->seg
.xlt_oct_size
= cpu_to_be32((max_page_list_len
+ 1) / 2);
1015 in
->seg
.qpn_mkey7_0
= cpu_to_be32(0xffffff << 8);
1016 in
->seg
.flags
= MLX5_PERM_UMR_EN
| MLX5_ACCESS_MODE_MTT
;
1017 in
->seg
.flags_pd
= cpu_to_be32(to_mpd(pd
)->pdn
);
1019 * TBD not needed - issue 197292 */
1020 in
->seg
.log2_page_size
= PAGE_SHIFT
;
1022 err
= mlx5_core_create_mkey(&dev
->mdev
, &mr
->mmr
, in
, sizeof(*in
), NULL
,
1028 mr
->ibmr
.lkey
= mr
->mmr
.key
;
1029 mr
->ibmr
.rkey
= mr
->mmr
.key
;
1036 return ERR_PTR(err
);
1039 struct ib_fast_reg_page_list
*mlx5_ib_alloc_fast_reg_page_list(struct ib_device
*ibdev
,
1042 struct mlx5_ib_fast_reg_page_list
*mfrpl
;
1043 int size
= page_list_len
* sizeof(u64
);
1045 mfrpl
= kmalloc(sizeof(*mfrpl
), GFP_KERNEL
);
1047 return ERR_PTR(-ENOMEM
);
1049 mfrpl
->ibfrpl
.page_list
= kmalloc(size
, GFP_KERNEL
);
1050 if (!mfrpl
->ibfrpl
.page_list
)
1053 mfrpl
->mapped_page_list
= dma_alloc_coherent(ibdev
->dma_device
,
1056 if (!mfrpl
->mapped_page_list
)
1059 WARN_ON(mfrpl
->map
& 0x3f);
1061 return &mfrpl
->ibfrpl
;
1064 kfree(mfrpl
->ibfrpl
.page_list
);
1066 return ERR_PTR(-ENOMEM
);
1069 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list
*page_list
)
1071 struct mlx5_ib_fast_reg_page_list
*mfrpl
= to_mfrpl(page_list
);
1072 struct mlx5_ib_dev
*dev
= to_mdev(page_list
->device
);
1073 int size
= page_list
->max_page_list_len
* sizeof(u64
);
1075 dma_free_coherent(&dev
->mdev
.pdev
->dev
, size
, mfrpl
->mapped_page_list
,
1077 kfree(mfrpl
->ibfrpl
.page_list
);