2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include <linux/dma-mapping.h>
37 #include <linux/sched/signal.h>
38 #include <linux/sched/mm.h>
39 #include <linux/export.h>
40 #include <linux/slab.h>
41 #include <linux/pagemap.h>
42 #include <rdma/ib_umem_odp.h>
46 static void __ib_umem_release(struct ib_device
*dev
, struct ib_umem
*umem
, int dirty
)
48 struct sg_page_iter sg_iter
;
52 ib_dma_unmap_sg(dev
, umem
->sg_head
.sgl
, umem
->sg_nents
,
55 for_each_sg_page(umem
->sg_head
.sgl
, &sg_iter
, umem
->sg_nents
, 0) {
56 page
= sg_page_iter_page(&sg_iter
);
57 unpin_user_pages_dirty_lock(&page
, 1, umem
->writable
&& dirty
);
60 sg_free_table(&umem
->sg_head
);
63 /* ib_umem_add_sg_table - Add N contiguous pages to scatter table
65 * sg: current scatterlist entry
66 * page_list: array of npage struct page pointers
67 * npages: number of pages in page_list
68 * max_seg_sz: maximum segment size in bytes
69 * nents: [out] number of entries in the scatterlist
71 * Return new end of scatterlist
73 static struct scatterlist
*ib_umem_add_sg_table(struct scatterlist
*sg
,
74 struct page
**page_list
,
76 unsigned int max_seg_sz
,
79 unsigned long first_pfn
;
81 bool update_cur_sg
= false;
82 bool first
= !sg_page(sg
);
84 /* Check if new page_list is contiguous with end of previous page_list.
85 * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
87 if (!first
&& (page_to_pfn(sg_page(sg
)) + (sg
->length
>> PAGE_SHIFT
) ==
88 page_to_pfn(page_list
[0])))
93 struct page
*first_page
= page_list
[i
];
95 first_pfn
= page_to_pfn(first_page
);
97 /* Compute the number of contiguous pages we have starting
100 for (len
= 0; i
!= npages
&&
101 first_pfn
+ len
== page_to_pfn(page_list
[i
]) &&
102 len
< (max_seg_sz
>> PAGE_SHIFT
);
106 /* Squash N contiguous pages from page_list into current sge */
108 if ((max_seg_sz
- sg
->length
) >= (len
<< PAGE_SHIFT
)) {
109 sg_set_page(sg
, sg_page(sg
),
110 sg
->length
+ (len
<< PAGE_SHIFT
),
112 update_cur_sg
= false;
115 update_cur_sg
= false;
118 /* Squash N contiguous pages into next sge or first sge */
123 sg_set_page(sg
, first_page
, len
<< PAGE_SHIFT
, 0);
131 * ib_umem_find_best_pgsz - Find best HW page size to use for this MR
134 * @pgsz_bitmap: bitmap of HW supported page sizes
137 * This helper is intended for HW that support multiple page
138 * sizes but can do only a single page size in an MR.
140 * Returns 0 if the umem requires page sizes not supported by
141 * the driver to be mapped. Drivers always supporting PAGE_SIZE
142 * or smaller will never see a 0 result.
144 unsigned long ib_umem_find_best_pgsz(struct ib_umem
*umem
,
145 unsigned long pgsz_bitmap
,
148 struct scatterlist
*sg
;
149 unsigned int best_pg_bit
;
150 unsigned long va
, pgoff
;
154 /* At minimum, drivers must support PAGE_SIZE or smaller */
155 if (WARN_ON(!(pgsz_bitmap
& GENMASK(PAGE_SHIFT
, 0))))
159 /* max page size not to exceed MR length */
160 mask
= roundup_pow_of_two(umem
->length
);
161 /* offset into first SGL */
162 pgoff
= umem
->address
& ~PAGE_MASK
;
164 for_each_sg(umem
->sg_head
.sgl
, sg
, umem
->nmap
, i
) {
165 /* Walk SGL and reduce max page size if VA/PA bits differ
168 mask
|= (sg_dma_address(sg
) + pgoff
) ^ va
;
169 va
+= sg_dma_len(sg
) - pgoff
;
170 /* Except for the last entry, the ending iova alignment sets
171 * the maximum possible page size as the low bits of the iova
172 * must be zero when starting the next chunk.
174 if (i
!= (umem
->nmap
- 1))
178 best_pg_bit
= rdma_find_pg_bit(mask
, pgsz_bitmap
);
180 return BIT_ULL(best_pg_bit
);
182 EXPORT_SYMBOL(ib_umem_find_best_pgsz
);
185 * ib_umem_get - Pin and DMA map userspace memory.
187 * @device: IB device to connect UMEM
188 * @addr: userspace virtual address to start at
189 * @size: length of region to pin
190 * @access: IB_ACCESS_xxx flags for memory being pinned
192 struct ib_umem
*ib_umem_get(struct ib_device
*device
, unsigned long addr
,
193 size_t size
, int access
)
195 struct ib_umem
*umem
;
196 struct page
**page_list
;
197 unsigned long lock_limit
;
198 unsigned long new_pinned
;
199 unsigned long cur_base
;
200 unsigned long dma_attr
= 0;
201 struct mm_struct
*mm
;
202 unsigned long npages
;
204 struct scatterlist
*sg
;
205 unsigned int gup_flags
= FOLL_WRITE
;
208 * If the combination of the addr and size requested for this memory
209 * region causes an integer overflow, return error.
211 if (((addr
+ size
) < addr
) ||
212 PAGE_ALIGN(addr
+ size
) < (addr
+ size
))
213 return ERR_PTR(-EINVAL
);
216 return ERR_PTR(-EPERM
);
218 if (access
& IB_ACCESS_ON_DEMAND
)
219 return ERR_PTR(-EOPNOTSUPP
);
221 umem
= kzalloc(sizeof(*umem
), GFP_KERNEL
);
223 return ERR_PTR(-ENOMEM
);
224 umem
->ibdev
= device
;
226 umem
->address
= addr
;
227 umem
->writable
= ib_access_writable(access
);
228 umem
->owning_mm
= mm
= current
->mm
;
231 page_list
= (struct page
**) __get_free_page(GFP_KERNEL
);
237 npages
= ib_umem_num_pages(umem
);
238 if (npages
== 0 || npages
> UINT_MAX
) {
243 lock_limit
= rlimit(RLIMIT_MEMLOCK
) >> PAGE_SHIFT
;
245 new_pinned
= atomic64_add_return(npages
, &mm
->pinned_vm
);
246 if (new_pinned
> lock_limit
&& !capable(CAP_IPC_LOCK
)) {
247 atomic64_sub(npages
, &mm
->pinned_vm
);
252 cur_base
= addr
& PAGE_MASK
;
254 ret
= sg_alloc_table(&umem
->sg_head
, npages
, GFP_KERNEL
);
259 gup_flags
|= FOLL_FORCE
;
261 sg
= umem
->sg_head
.sgl
;
264 ret
= pin_user_pages_fast(cur_base
,
265 min_t(unsigned long, npages
,
267 sizeof(struct page
*)),
268 gup_flags
| FOLL_LONGTERM
, page_list
);
272 cur_base
+= ret
* PAGE_SIZE
;
275 sg
= ib_umem_add_sg_table(sg
, page_list
, ret
,
276 dma_get_max_seg_size(device
->dma_device
),
282 if (access
& IB_ACCESS_RELAXED_ORDERING
)
283 dma_attr
|= DMA_ATTR_WEAK_ORDERING
;
286 ib_dma_map_sg_attrs(device
, umem
->sg_head
.sgl
, umem
->sg_nents
,
287 DMA_BIDIRECTIONAL
, dma_attr
);
298 __ib_umem_release(device
, umem
, 0);
300 atomic64_sub(ib_umem_num_pages(umem
), &mm
->pinned_vm
);
302 free_page((unsigned long) page_list
);
305 mmdrop(umem
->owning_mm
);
308 return ret
? ERR_PTR(ret
) : umem
;
310 EXPORT_SYMBOL(ib_umem_get
);
313 * ib_umem_release - release memory pinned with ib_umem_get
314 * @umem: umem struct to release
316 void ib_umem_release(struct ib_umem
*umem
)
321 return ib_umem_odp_release(to_ib_umem_odp(umem
));
323 __ib_umem_release(umem
->ibdev
, umem
, 1);
325 atomic64_sub(ib_umem_num_pages(umem
), &umem
->owning_mm
->pinned_vm
);
326 mmdrop(umem
->owning_mm
);
329 EXPORT_SYMBOL(ib_umem_release
);
331 int ib_umem_page_count(struct ib_umem
*umem
)
334 struct scatterlist
*sg
;
336 for_each_sg(umem
->sg_head
.sgl
, sg
, umem
->nmap
, i
)
337 n
+= sg_dma_len(sg
) >> PAGE_SHIFT
;
341 EXPORT_SYMBOL(ib_umem_page_count
);
344 * Copy from the given ib_umem's pages to the given buffer.
346 * umem - the umem to copy from
347 * offset - offset to start copying from
348 * dst - destination buffer
349 * length - buffer length
351 * Returns 0 on success, or an error code.
353 int ib_umem_copy_from(void *dst
, struct ib_umem
*umem
, size_t offset
,
356 size_t end
= offset
+ length
;
359 if (offset
> umem
->length
|| length
> umem
->length
- offset
) {
360 pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
361 offset
, umem
->length
, end
);
365 ret
= sg_pcopy_to_buffer(umem
->sg_head
.sgl
, umem
->sg_nents
, dst
, length
,
366 offset
+ ib_umem_offset(umem
));
370 else if (ret
!= length
)
375 EXPORT_SYMBOL(ib_umem_copy_from
);