2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include <linux/dma-mapping.h>
37 #include <linux/sched/signal.h>
38 #include <linux/sched/mm.h>
39 #include <linux/export.h>
40 #include <linux/slab.h>
41 #include <linux/pagemap.h>
42 #include <linux/count_zeros.h>
43 #include <rdma/ib_umem_odp.h>
47 static void __ib_umem_release(struct ib_device
*dev
, struct ib_umem
*umem
, int dirty
)
49 struct sg_page_iter sg_iter
;
53 ib_dma_unmap_sg(dev
, umem
->sg_head
.sgl
, umem
->sg_nents
,
56 for_each_sg_page(umem
->sg_head
.sgl
, &sg_iter
, umem
->sg_nents
, 0) {
57 page
= sg_page_iter_page(&sg_iter
);
58 unpin_user_pages_dirty_lock(&page
, 1, umem
->writable
&& dirty
);
61 sg_free_table(&umem
->sg_head
);
65 * ib_umem_find_best_pgsz - Find best HW page size to use for this MR
68 * @pgsz_bitmap: bitmap of HW supported page sizes
71 * This helper is intended for HW that support multiple page
72 * sizes but can do only a single page size in an MR.
74 * Returns 0 if the umem requires page sizes not supported by
75 * the driver to be mapped. Drivers always supporting PAGE_SIZE
76 * or smaller will never see a 0 result.
78 unsigned long ib_umem_find_best_pgsz(struct ib_umem
*umem
,
79 unsigned long pgsz_bitmap
,
82 struct scatterlist
*sg
;
83 unsigned long va
, pgoff
;
88 unsigned int page_size
= BIT(to_ib_umem_odp(umem
)->page_shift
);
90 /* ODP must always be self consistent. */
91 if (!(pgsz_bitmap
& page_size
))
96 /* rdma_for_each_block() has a bug if the page size is smaller than the
97 * page size used to build the umem. For now prevent smaller page sizes
98 * from being returned.
100 pgsz_bitmap
&= GENMASK(BITS_PER_LONG
- 1, PAGE_SHIFT
);
102 /* At minimum, drivers must support PAGE_SIZE or smaller */
103 if (WARN_ON(!(pgsz_bitmap
& GENMASK(PAGE_SHIFT
, 0))))
106 umem
->iova
= va
= virt
;
107 /* The best result is the smallest page size that results in the minimum
108 * number of required pages. Compute the largest page size that could
109 * work based on VA address bits that don't change.
112 GENMASK(BITS_PER_LONG
- 1,
113 bits_per((umem
->length
- 1 + virt
) ^ virt
));
114 /* offset into first SGL */
115 pgoff
= umem
->address
& ~PAGE_MASK
;
117 for_each_sg(umem
->sg_head
.sgl
, sg
, umem
->nmap
, i
) {
118 /* Walk SGL and reduce max page size if VA/PA bits differ
121 mask
|= (sg_dma_address(sg
) + pgoff
) ^ va
;
122 va
+= sg_dma_len(sg
) - pgoff
;
123 /* Except for the last entry, the ending iova alignment sets
124 * the maximum possible page size as the low bits of the iova
125 * must be zero when starting the next chunk.
127 if (i
!= (umem
->nmap
- 1))
132 /* The mask accumulates 1's in each position where the VA and physical
133 * address differ, thus the length of trailing 0 is the largest page
134 * size that can pass the VA through to the physical.
137 pgsz_bitmap
&= GENMASK(count_trailing_zeros(mask
), 0);
138 return rounddown_pow_of_two(pgsz_bitmap
);
140 EXPORT_SYMBOL(ib_umem_find_best_pgsz
);
143 * ib_umem_get - Pin and DMA map userspace memory.
145 * @device: IB device to connect UMEM
146 * @addr: userspace virtual address to start at
147 * @size: length of region to pin
148 * @access: IB_ACCESS_xxx flags for memory being pinned
150 struct ib_umem
*ib_umem_get(struct ib_device
*device
, unsigned long addr
,
151 size_t size
, int access
)
153 struct ib_umem
*umem
;
154 struct page
**page_list
;
155 unsigned long lock_limit
;
156 unsigned long new_pinned
;
157 unsigned long cur_base
;
158 unsigned long dma_attr
= 0;
159 struct mm_struct
*mm
;
160 unsigned long npages
;
162 struct scatterlist
*sg
= NULL
;
163 unsigned int gup_flags
= FOLL_WRITE
;
166 * If the combination of the addr and size requested for this memory
167 * region causes an integer overflow, return error.
169 if (((addr
+ size
) < addr
) ||
170 PAGE_ALIGN(addr
+ size
) < (addr
+ size
))
171 return ERR_PTR(-EINVAL
);
174 return ERR_PTR(-EPERM
);
176 if (access
& IB_ACCESS_ON_DEMAND
)
177 return ERR_PTR(-EOPNOTSUPP
);
179 umem
= kzalloc(sizeof(*umem
), GFP_KERNEL
);
181 return ERR_PTR(-ENOMEM
);
182 umem
->ibdev
= device
;
184 umem
->address
= addr
;
186 * Drivers should call ib_umem_find_best_pgsz() to set the iova
190 umem
->writable
= ib_access_writable(access
);
191 umem
->owning_mm
= mm
= current
->mm
;
194 page_list
= (struct page
**) __get_free_page(GFP_KERNEL
);
200 npages
= ib_umem_num_pages(umem
);
201 if (npages
== 0 || npages
> UINT_MAX
) {
206 lock_limit
= rlimit(RLIMIT_MEMLOCK
) >> PAGE_SHIFT
;
208 new_pinned
= atomic64_add_return(npages
, &mm
->pinned_vm
);
209 if (new_pinned
> lock_limit
&& !capable(CAP_IPC_LOCK
)) {
210 atomic64_sub(npages
, &mm
->pinned_vm
);
215 cur_base
= addr
& PAGE_MASK
;
218 gup_flags
|= FOLL_FORCE
;
222 ret
= pin_user_pages_fast(cur_base
,
223 min_t(unsigned long, npages
,
225 sizeof(struct page
*)),
226 gup_flags
| FOLL_LONGTERM
, page_list
);
230 cur_base
+= ret
* PAGE_SIZE
;
232 sg
= __sg_alloc_table_from_pages(&umem
->sg_head
, page_list
, ret
,
233 0, ret
<< PAGE_SHIFT
,
234 ib_dma_max_seg_size(device
), sg
, npages
,
236 umem
->sg_nents
= umem
->sg_head
.nents
;
238 unpin_user_pages_dirty_lock(page_list
, ret
, 0);
244 if (access
& IB_ACCESS_RELAXED_ORDERING
)
245 dma_attr
|= DMA_ATTR_WEAK_ORDERING
;
248 ib_dma_map_sg_attrs(device
, umem
->sg_head
.sgl
, umem
->sg_nents
,
249 DMA_BIDIRECTIONAL
, dma_attr
);
260 __ib_umem_release(device
, umem
, 0);
261 atomic64_sub(ib_umem_num_pages(umem
), &mm
->pinned_vm
);
263 free_page((unsigned long) page_list
);
266 mmdrop(umem
->owning_mm
);
269 return ret
? ERR_PTR(ret
) : umem
;
271 EXPORT_SYMBOL(ib_umem_get
);
274 * ib_umem_release - release memory pinned with ib_umem_get
275 * @umem: umem struct to release
277 void ib_umem_release(struct ib_umem
*umem
)
282 return ib_umem_odp_release(to_ib_umem_odp(umem
));
284 __ib_umem_release(umem
->ibdev
, umem
, 1);
286 atomic64_sub(ib_umem_num_pages(umem
), &umem
->owning_mm
->pinned_vm
);
287 mmdrop(umem
->owning_mm
);
290 EXPORT_SYMBOL(ib_umem_release
);
293 * Copy from the given ib_umem's pages to the given buffer.
295 * umem - the umem to copy from
296 * offset - offset to start copying from
297 * dst - destination buffer
298 * length - buffer length
300 * Returns 0 on success, or an error code.
302 int ib_umem_copy_from(void *dst
, struct ib_umem
*umem
, size_t offset
,
305 size_t end
= offset
+ length
;
308 if (offset
> umem
->length
|| length
> umem
->length
- offset
) {
309 pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
310 offset
, umem
->length
, end
);
314 ret
= sg_pcopy_to_buffer(umem
->sg_head
.sgl
, umem
->sg_nents
, dst
, length
,
315 offset
+ ib_umem_offset(umem
));
319 else if (ret
!= length
)
324 EXPORT_SYMBOL(ib_umem_copy_from
);