1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2022, Oracle and/or its affiliates.
4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
6 #include <linux/highmem.h>
7 #include <linux/iova_bitmap.h>
9 #include <linux/slab.h>
11 #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
14 * struct iova_bitmap_map - A bitmap representing an IOVA range
16 * Main data structure for tracking mapped user pages of bitmap data.
18 * For example, for something recording dirty IOVAs, it will be provided a
19 * struct iova_bitmap structure, as a general structure for iterating the
20 * total IOVA range. The struct iova_bitmap_map, though, represents the
21 * subset of said IOVA space that is pinned by its parent structure (struct
24 * The user does not need to exact location of the bits in the bitmap.
25 * From user perspective the only API available is iova_bitmap_set() which
26 * records the IOVA *range* in the bitmap by setting the corresponding
29 * The bitmap is an array of u64 whereas each bit represents an IOVA of
30 * range of (1 << pgshift). Thus formula for the bitmap data to be set is:
32 * data[(iova / page_size) / 64] & (1ULL << (iova % 64))
34 struct iova_bitmap_map
{
35 /* base IOVA representing bit 0 of the first page */
41 /* page size order that each bit granules to */
42 unsigned long pgshift
;
44 /* page offset of the first user page pinned */
47 /* number of pages pinned */
50 /* pinned pages representing the bitmap data */
55 * struct iova_bitmap - The IOVA bitmap object
57 * Main data structure for iterating over the bitmap data.
59 * Abstracts the pinning work and iterates in IOVA ranges.
60 * It uses a windowing scheme and pins the bitmap in relatively
63 * The bitmap object uses one base page to store all the pinned pages
64 * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores
65 * 512 struct page pointers which, if the base page size is 4K, it means
66 * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is
67 * also 4K then the range window to iterate is 64G.
69 * For example iterating on a total IOVA range of 4G..128G, it will walk
70 * through this set of ranges:
75 * An example of the APIs on how to use/iterate over the IOVA bitmap:
77 * bitmap = iova_bitmap_alloc(iova, length, page_size, data);
79 * return PTR_ERR(bitmap);
81 * ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn);
83 * iova_bitmap_free(bitmap);
85 * Each iteration of the @dirty_reporter_fn is called with a unique @iova
86 * and @length argument, indicating the current range available through the
87 * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty
88 * areas (@iova_length) within that provided range, as following:
90 * iova_bitmap_set(bitmap, iova, iova_length);
92 * The internals of the object uses an index @mapped_base_index that indexes
93 * which u64 word of the bitmap is mapped, up to @mapped_total_index.
94 * Those keep being incremented until @mapped_total_index is reached while
95 * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages.
97 * The IOVA bitmap is usually located on what tracks DMA mapped ranges or
98 * some form of IOVA range tracking that co-relates to the user passed
102 /* IOVA range representing the currently mapped bitmap data */
103 struct iova_bitmap_map mapped
;
105 /* userspace address of the bitmap */
108 /* u64 index that @mapped points to */
109 unsigned long mapped_base_index
;
111 /* how many u64 can we walk in total */
112 unsigned long mapped_total_index
;
114 /* base IOVA of the whole bitmap */
117 /* length of the IOVA range for the whole bitmap */
122 * Converts a relative IOVA to a bitmap index.
123 * This function provides the index into the u64 array (bitmap::bitmap)
124 * for a given IOVA offset.
125 * Relative IOVA means relative to the bitmap::mapped base IOVA
126 * (stored in mapped::iova). All computations in this file are done using
127 * relative IOVAs and thus avoid an extra subtraction against mapped::iova.
128 * The user API iova_bitmap_set() always uses a regular absolute IOVAs.
130 static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap
*bitmap
,
133 unsigned long pgsize
= 1 << bitmap
->mapped
.pgshift
;
135 return iova
/ (BITS_PER_TYPE(*bitmap
->bitmap
) * pgsize
);
139 * Converts a bitmap index to a *relative* IOVA.
141 static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap
*bitmap
,
144 unsigned long pgshift
= bitmap
->mapped
.pgshift
;
146 return (index
* BITS_PER_TYPE(*bitmap
->bitmap
)) << pgshift
;
150 * Returns the base IOVA of the mapped range.
152 static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap
*bitmap
)
154 unsigned long skip
= bitmap
->mapped_base_index
;
156 return bitmap
->iova
+ iova_bitmap_index_to_offset(bitmap
, skip
);
159 static unsigned long iova_bitmap_mapped_length(struct iova_bitmap
*bitmap
);
162 * Pins the bitmap user pages for the current range window.
163 * This is internal to IOVA bitmap and called when advancing the
164 * index (@mapped_base_index) or allocating the bitmap.
166 static int iova_bitmap_get(struct iova_bitmap
*bitmap
)
168 struct iova_bitmap_map
*mapped
= &bitmap
->mapped
;
169 unsigned long npages
;
174 * @mapped_base_index is the index of the currently mapped u64 words
175 * that we have access. Anything before @mapped_base_index is not
176 * mapped. The range @mapped_base_index .. @mapped_total_index-1 is
177 * mapped but capped at a maximum number of pages.
179 npages
= DIV_ROUND_UP((bitmap
->mapped_total_index
-
180 bitmap
->mapped_base_index
) *
181 sizeof(*bitmap
->bitmap
), PAGE_SIZE
);
184 * Bitmap address to be pinned is calculated via pointer arithmetic
185 * with bitmap u64 word index.
187 addr
= bitmap
->bitmap
+ bitmap
->mapped_base_index
;
190 * We always cap at max number of 'struct page' a base page can fit.
191 * This is, for example, on x86 means 2M of bitmap data max.
193 npages
= min(npages
+ !!offset_in_page(addr
),
194 PAGE_SIZE
/ sizeof(struct page
*));
196 ret
= pin_user_pages_fast((unsigned long)addr
, npages
,
197 FOLL_WRITE
, mapped
->pages
);
201 mapped
->npages
= (unsigned long)ret
;
202 /* Base IOVA where @pages point to i.e. bit 0 of the first page */
203 mapped
->iova
= iova_bitmap_mapped_iova(bitmap
);
206 * offset of the page where pinned pages bit 0 is located.
207 * This handles the case where the bitmap is not PAGE_SIZE
210 mapped
->pgoff
= offset_in_page(addr
);
211 mapped
->length
= iova_bitmap_mapped_length(bitmap
);
216 * Unpins the bitmap user pages and clears @npages
217 * (un)pinning is abstracted from API user and it's done when advancing
218 * the index or freeing the bitmap.
220 static void iova_bitmap_put(struct iova_bitmap
*bitmap
)
222 struct iova_bitmap_map
*mapped
= &bitmap
->mapped
;
224 if (mapped
->npages
) {
225 unpin_user_pages(mapped
->pages
, mapped
->npages
);
231 * iova_bitmap_alloc() - Allocates an IOVA bitmap object
232 * @iova: Start address of the IOVA range
233 * @length: Length of the IOVA range
234 * @page_size: Page size of the IOVA bitmap. It defines what each bit
235 * granularity represents
236 * @data: Userspace address of the bitmap
238 * Allocates an IOVA object and initializes all its fields including the
239 * first user pages of @data.
241 * Return: A pointer to a newly allocated struct iova_bitmap
242 * or ERR_PTR() on error.
244 struct iova_bitmap
*iova_bitmap_alloc(unsigned long iova
, size_t length
,
245 unsigned long page_size
, u64 __user
*data
)
247 struct iova_bitmap_map
*mapped
;
248 struct iova_bitmap
*bitmap
;
251 bitmap
= kzalloc(sizeof(*bitmap
), GFP_KERNEL
);
253 return ERR_PTR(-ENOMEM
);
255 mapped
= &bitmap
->mapped
;
256 mapped
->pgshift
= __ffs(page_size
);
257 bitmap
->bitmap
= (u8 __user
*)data
;
258 bitmap
->mapped_total_index
=
259 iova_bitmap_offset_to_index(bitmap
, length
- 1) + 1;
261 bitmap
->length
= length
;
263 mapped
->pages
= (struct page
**)__get_free_page(GFP_KERNEL
);
264 if (!mapped
->pages
) {
272 iova_bitmap_free(bitmap
);
275 EXPORT_SYMBOL_NS_GPL(iova_bitmap_alloc
, "IOMMUFD");
278 * iova_bitmap_free() - Frees an IOVA bitmap object
279 * @bitmap: IOVA bitmap to free
281 * It unpins and releases pages array memory and clears any leftover
284 void iova_bitmap_free(struct iova_bitmap
*bitmap
)
286 struct iova_bitmap_map
*mapped
= &bitmap
->mapped
;
288 iova_bitmap_put(bitmap
);
291 free_page((unsigned long)mapped
->pages
);
292 mapped
->pages
= NULL
;
297 EXPORT_SYMBOL_NS_GPL(iova_bitmap_free
, "IOMMUFD");
300 * Returns the remaining bitmap indexes from mapped_total_index to process for
301 * the currently pinned bitmap pages.
303 static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap
*bitmap
)
305 unsigned long remaining
, bytes
;
307 bytes
= (bitmap
->mapped
.npages
<< PAGE_SHIFT
) - bitmap
->mapped
.pgoff
;
309 remaining
= bitmap
->mapped_total_index
- bitmap
->mapped_base_index
;
310 remaining
= min_t(unsigned long, remaining
,
311 DIV_ROUND_UP(bytes
, sizeof(*bitmap
->bitmap
)));
317 * Returns the length of the mapped IOVA range.
319 static unsigned long iova_bitmap_mapped_length(struct iova_bitmap
*bitmap
)
321 unsigned long max_iova
= bitmap
->iova
+ bitmap
->length
- 1;
322 unsigned long iova
= iova_bitmap_mapped_iova(bitmap
);
323 unsigned long remaining
;
326 * iova_bitmap_mapped_remaining() returns a number of indexes which
327 * when converted to IOVA gives us a max length that the bitmap
328 * pinned data can cover. Afterwards, that is capped to
329 * only cover the IOVA range in @bitmap::iova .. @bitmap::length.
331 remaining
= iova_bitmap_index_to_offset(bitmap
,
332 iova_bitmap_mapped_remaining(bitmap
));
334 if (iova
+ remaining
- 1 > max_iova
)
335 remaining
-= ((iova
+ remaining
- 1) - max_iova
);
341 * Returns true if [@iova..@iova+@length-1] is part of the mapped IOVA range.
343 static bool iova_bitmap_mapped_range(struct iova_bitmap_map
*mapped
,
344 unsigned long iova
, size_t length
)
346 return mapped
->npages
&&
347 (iova
>= mapped
->iova
&&
348 (iova
+ length
- 1) <= (mapped
->iova
+ mapped
->length
- 1));
352 * Advances to a selected range, releases the current pinned
353 * pages and pins the next set of bitmap pages.
354 * Returns 0 on success or otherwise errno.
356 static int iova_bitmap_advance_to(struct iova_bitmap
*bitmap
,
361 index
= iova_bitmap_offset_to_index(bitmap
, iova
- bitmap
->iova
);
362 if (index
>= bitmap
->mapped_total_index
)
364 bitmap
->mapped_base_index
= index
;
366 iova_bitmap_put(bitmap
);
368 /* Pin the next set of bitmap pages */
369 return iova_bitmap_get(bitmap
);
373 * iova_bitmap_for_each() - Iterates over the bitmap
374 * @bitmap: IOVA bitmap to iterate
375 * @opaque: Additional argument to pass to the callback
376 * @fn: Function that gets called for each IOVA range
378 * Helper function to iterate over bitmap data representing a portion of IOVA
379 * space. It hides the complexity of iterating bitmaps and translating the
380 * mapped bitmap user pages into IOVA ranges to process.
382 * Return: 0 on success, and an error on failure either upon
383 * iteration or when the callback returns an error.
385 int iova_bitmap_for_each(struct iova_bitmap
*bitmap
, void *opaque
,
388 return fn(bitmap
, bitmap
->iova
, bitmap
->length
, opaque
);
390 EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each
, "IOMMUFD");
393 * iova_bitmap_set() - Records an IOVA range in bitmap
394 * @bitmap: IOVA bitmap
395 * @iova: IOVA to start
396 * @length: IOVA range length
398 * Set the bits corresponding to the range [iova .. iova+length-1] in
402 void iova_bitmap_set(struct iova_bitmap
*bitmap
,
403 unsigned long iova
, size_t length
)
405 struct iova_bitmap_map
*mapped
= &bitmap
->mapped
;
406 unsigned long cur_bit
, last_bit
, last_page_idx
;
409 if (unlikely(!iova_bitmap_mapped_range(mapped
, iova
, length
))) {
412 * The attempt to advance the base index to @iova
413 * may fail if it's out of bounds, or pinning the pages
416 if (iova_bitmap_advance_to(bitmap
, iova
))
420 last_page_idx
= mapped
->npages
- 1;
421 cur_bit
= ((iova
- mapped
->iova
) >>
422 mapped
->pgshift
) + mapped
->pgoff
* BITS_PER_BYTE
;
423 last_bit
= (((iova
+ length
- 1) - mapped
->iova
) >>
424 mapped
->pgshift
) + mapped
->pgoff
* BITS_PER_BYTE
;
427 unsigned int page_idx
= cur_bit
/ BITS_PER_PAGE
;
428 unsigned int offset
= cur_bit
% BITS_PER_PAGE
;
429 unsigned int nbits
= min(BITS_PER_PAGE
- offset
,
430 last_bit
- cur_bit
+ 1);
433 if (unlikely(page_idx
> last_page_idx
)) {
435 ((last_bit
- cur_bit
+ 1) << mapped
->pgshift
);
437 iova
+= (length
- left
);
442 kaddr
= kmap_local_page(mapped
->pages
[page_idx
]);
443 bitmap_set(kaddr
, offset
, nbits
);
446 } while (cur_bit
<= last_bit
);
448 EXPORT_SYMBOL_NS_GPL(iova_bitmap_set
, "IOMMUFD");