1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
3 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
4 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
5 * Copyright 2019 Marvell. All rights reserved.
7 #include <linux/xarray.h>
12 * rdma_umap_priv_init() - Initialize the private data of a vma
14 * @priv: The already allocated private data
15 * @vma: The vm area struct that needs private data
16 * @entry: entry into the mmap_xa that needs to be linked with
19 * Each time we map IO memory into user space this keeps track of the
20 * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
21 * to point to the zero page and allow the hot unplug to proceed.
23 * This is necessary for cases like PCI physical hot unplug as the actual BAR
24 * memory may vanish after this and access to it from userspace could MCE.
26 * RDMA drivers supporting disassociation must have their user space designed
27 * to cope in some way with their IO pages going to the zero page.
30 void rdma_umap_priv_init(struct rdma_umap_priv
*priv
,
31 struct vm_area_struct
*vma
,
32 struct rdma_user_mmap_entry
*entry
)
34 struct ib_uverbs_file
*ufile
= vma
->vm_file
->private_data
;
38 kref_get(&entry
->ref
);
41 vma
->vm_private_data
= priv
;
42 /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
44 mutex_lock(&ufile
->umap_lock
);
45 list_add(&priv
->list
, &ufile
->umaps
);
46 mutex_unlock(&ufile
->umap_lock
);
48 EXPORT_SYMBOL(rdma_umap_priv_init
);
51 * rdma_user_mmap_io() - Map IO memory into a process
53 * @ucontext: associated user context
54 * @vma: the vma related to the current mmap call
57 * @prot: pgprot to use in remap call
58 * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
59 * if mmap_entry is not used by the driver
61 * This is to be called by drivers as part of their mmap() functions if they
62 * wish to send something like PCI-E BAR memory to userspace.
64 * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
67 int rdma_user_mmap_io(struct ib_ucontext
*ucontext
, struct vm_area_struct
*vma
,
68 unsigned long pfn
, unsigned long size
, pgprot_t prot
,
69 struct rdma_user_mmap_entry
*entry
)
71 struct ib_uverbs_file
*ufile
= ucontext
->ufile
;
72 struct rdma_umap_priv
*priv
;
74 if (!(vma
->vm_flags
& VM_SHARED
))
77 if (vma
->vm_end
- vma
->vm_start
!= size
)
80 /* Driver is using this wrong, must be called by ib_uverbs_mmap */
81 if (WARN_ON(!vma
->vm_file
||
82 vma
->vm_file
->private_data
!= ufile
))
84 lockdep_assert_held(&ufile
->device
->disassociate_srcu
);
86 priv
= kzalloc(sizeof(*priv
), GFP_KERNEL
);
90 vma
->vm_page_prot
= prot
;
91 if (io_remap_pfn_range(vma
, vma
->vm_start
, pfn
, size
, prot
)) {
96 rdma_umap_priv_init(priv
, vma
, entry
);
99 EXPORT_SYMBOL(rdma_user_mmap_io
);
102 * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
104 * @ucontext: associated user context
105 * @pgoff: The mmap offset >> PAGE_SHIFT
107 * This function is called when a user tries to mmap with an offset (returned
108 * by rdma_user_mmap_get_offset()) it initially received from the driver. The
109 * rdma_user_mmap_entry was created by the function
110 * rdma_user_mmap_entry_insert(). This function increases the refcnt of the
111 * entry so that it won't be deleted from the xarray in the meantime.
113 * Return an reference to an entry if exists or NULL if there is no
114 * match. rdma_user_mmap_entry_put() must be called to put the reference.
116 struct rdma_user_mmap_entry
*
117 rdma_user_mmap_entry_get_pgoff(struct ib_ucontext
*ucontext
,
120 struct rdma_user_mmap_entry
*entry
;
125 xa_lock(&ucontext
->mmap_xa
);
127 entry
= xa_load(&ucontext
->mmap_xa
, pgoff
);
130 * If refcount is zero, entry is already being deleted, driver_removed
131 * indicates that the no further mmaps are possible and we waiting for
132 * the active VMAs to be closed.
134 if (!entry
|| entry
->start_pgoff
!= pgoff
|| entry
->driver_removed
||
135 !kref_get_unless_zero(&entry
->ref
))
138 xa_unlock(&ucontext
->mmap_xa
);
140 ibdev_dbg(ucontext
->device
, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
141 pgoff
, entry
->npages
);
146 xa_unlock(&ucontext
->mmap_xa
);
149 EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff
);
152 * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
154 * @ucontext: associated user context
155 * @vma: the vma being mmap'd into
157 * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
158 * checks that the VMA is correct.
160 struct rdma_user_mmap_entry
*
161 rdma_user_mmap_entry_get(struct ib_ucontext
*ucontext
,
162 struct vm_area_struct
*vma
)
164 struct rdma_user_mmap_entry
*entry
;
166 if (!(vma
->vm_flags
& VM_SHARED
))
168 entry
= rdma_user_mmap_entry_get_pgoff(ucontext
, vma
->vm_pgoff
);
171 if (entry
->npages
* PAGE_SIZE
!= vma
->vm_end
- vma
->vm_start
) {
172 rdma_user_mmap_entry_put(entry
);
177 EXPORT_SYMBOL(rdma_user_mmap_entry_get
);
179 static void rdma_user_mmap_entry_free(struct kref
*kref
)
181 struct rdma_user_mmap_entry
*entry
=
182 container_of(kref
, struct rdma_user_mmap_entry
, ref
);
183 struct ib_ucontext
*ucontext
= entry
->ucontext
;
187 * Erase all entries occupied by this single entry, this is deferred
188 * until all VMA are closed so that the mmap offsets remain unique.
190 xa_lock(&ucontext
->mmap_xa
);
191 for (i
= 0; i
< entry
->npages
; i
++)
192 __xa_erase(&ucontext
->mmap_xa
, entry
->start_pgoff
+ i
);
193 xa_unlock(&ucontext
->mmap_xa
);
195 ibdev_dbg(ucontext
->device
, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
196 entry
->start_pgoff
, entry
->npages
);
198 if (ucontext
->device
->ops
.mmap_free
)
199 ucontext
->device
->ops
.mmap_free(entry
);
203 * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
205 * @entry: an entry in the mmap_xa
207 * This function is called when the mapping is closed if it was
208 * an io mapping or when the driver is done with the entry for
210 * Should be called after rdma_user_mmap_entry_get was called
211 * and entry is no longer needed. This function will erase the
212 * entry and free it if its refcnt reaches zero.
214 void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry
*entry
)
216 kref_put(&entry
->ref
, rdma_user_mmap_entry_free
);
218 EXPORT_SYMBOL(rdma_user_mmap_entry_put
);
221 * rdma_user_mmap_entry_remove() - Drop reference to entry and
222 * mark it as unmmapable
224 * @entry: the entry to insert into the mmap_xa
226 * Drivers can call this to prevent userspace from creating more mappings for
227 * entry, however existing mmaps continue to exist and ops->mmap_free() will
228 * not be called until all user mmaps are destroyed.
230 void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry
*entry
)
235 xa_lock(&entry
->ucontext
->mmap_xa
);
236 entry
->driver_removed
= true;
237 xa_unlock(&entry
->ucontext
->mmap_xa
);
238 kref_put(&entry
->ref
, rdma_user_mmap_entry_free
);
240 EXPORT_SYMBOL(rdma_user_mmap_entry_remove
);
243 * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
246 * @ucontext: associated user context.
247 * @entry: the entry to insert into the mmap_xa
248 * @length: length of the address that will be mmapped
249 * @min_pgoff: minimum pgoff to be returned
250 * @max_pgoff: maximum pgoff to be returned
252 * This function should be called by drivers that use the rdma_user_mmap
253 * interface for implementing their mmap syscall A database of mmap offsets is
254 * handled in the core and helper functions are provided to insert entries
255 * into the database and extract entries when the user calls mmap with the
256 * given offset. The function allocates a unique page offset in a given range
257 * that should be provided to user, the user will use the offset to retrieve
258 * information such as address to be mapped and how.
260 * Return: 0 on success and -ENOMEM on failure
262 int rdma_user_mmap_entry_insert_range(struct ib_ucontext
*ucontext
,
263 struct rdma_user_mmap_entry
*entry
,
264 size_t length
, u32 min_pgoff
,
267 struct ib_uverbs_file
*ufile
= ucontext
->ufile
;
268 XA_STATE(xas
, &ucontext
->mmap_xa
, min_pgoff
);
269 u32 xa_first
, xa_last
, npages
;
276 kref_init(&entry
->ref
);
277 entry
->ucontext
= ucontext
;
280 * We want the whole allocation to be done without interruption from a
281 * different thread. The allocation requires finding a free range and
282 * storing. During the xa_insert the lock could be released, possibly
283 * allowing another thread to choose the same range.
285 mutex_lock(&ufile
->umap_lock
);
287 xa_lock(&ucontext
->mmap_xa
);
289 /* We want to find an empty range */
290 npages
= (u32
)DIV_ROUND_UP(length
, PAGE_SIZE
);
291 entry
->npages
= npages
;
293 /* First find an empty index */
294 xas_find_marked(&xas
, max_pgoff
, XA_FREE_MARK
);
295 if (xas
.xa_node
== XAS_RESTART
)
298 xa_first
= xas
.xa_index
;
300 /* Is there enough room to have the range? */
301 if (check_add_overflow(xa_first
, npages
, &xa_last
))
305 * Now look for the next present entry. If an entry doesn't
306 * exist, we found an empty range and can proceed.
308 xas_next_entry(&xas
, xa_last
- 1);
309 if (xas
.xa_node
== XAS_BOUNDS
|| xas
.xa_index
>= xa_last
)
313 for (i
= xa_first
; i
< xa_last
; i
++) {
314 err
= __xa_insert(&ucontext
->mmap_xa
, i
, entry
, GFP_KERNEL
);
320 * Internally the kernel uses a page offset, in libc this is a byte
321 * offset. Drivers should not return pgoff to userspace.
323 entry
->start_pgoff
= xa_first
;
324 xa_unlock(&ucontext
->mmap_xa
);
325 mutex_unlock(&ufile
->umap_lock
);
327 ibdev_dbg(ucontext
->device
, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
328 entry
->start_pgoff
, npages
);
333 for (; i
> xa_first
; i
--)
334 __xa_erase(&ucontext
->mmap_xa
, i
- 1);
337 xa_unlock(&ucontext
->mmap_xa
);
338 mutex_unlock(&ufile
->umap_lock
);
341 EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range
);
344 * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
346 * @ucontext: associated user context.
347 * @entry: the entry to insert into the mmap_xa
348 * @length: length of the address that will be mmapped
350 * This function should be called by drivers that use the rdma_user_mmap
351 * interface for handling user mmapped addresses. The database is handled in
352 * the core and helper functions are provided to insert entries into the
353 * database and extract entries when the user calls mmap with the given offset.
354 * The function allocates a unique page offset that should be provided to user,
355 * the user will use the offset to retrieve information such as address to
358 * Return: 0 on success and -ENOMEM on failure
360 int rdma_user_mmap_entry_insert(struct ib_ucontext
*ucontext
,
361 struct rdma_user_mmap_entry
*entry
,
364 return rdma_user_mmap_entry_insert_range(ucontext
, entry
, length
, 0,
367 EXPORT_SYMBOL(rdma_user_mmap_entry_insert
);