1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * IOMMU helpers in MMU context.
5 * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
8 #include <linux/sched/signal.h>
9 #include <linux/slab.h>
10 #include <linux/rculist.h>
11 #include <linux/vmalloc.h>
12 #include <linux/mutex.h>
13 #include <linux/migrate.h>
14 #include <linux/hugetlb.h>
15 #include <linux/swap.h>
16 #include <linux/sizes.h>
18 #include <asm/mmu_context.h>
19 #include <asm/pte-walk.h>
20 #include <linux/mm_inline.h>
22 static DEFINE_MUTEX(mem_list_mutex
);
24 #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
25 #define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1)
27 struct mm_iommu_table_group_mem_t
{
28 struct list_head next
;
32 unsigned int pageshift
;
33 u64 ua
; /* userspace address */
34 u64 entries
; /* number of entries in hpas/hpages[] */
36 * in mm_iommu_get we temporarily use this to store
37 * struct page address.
39 * We need to convert ua to hpa in real mode. Make it
40 * simpler by storing physical address.
43 struct page
**hpages
; /* vmalloc'ed */
46 #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
47 u64 dev_hpa
; /* Device memory base address */
50 bool mm_iommu_preregistered(struct mm_struct
*mm
)
52 return !list_empty(&mm
->context
.iommu_group_mem_list
);
54 EXPORT_SYMBOL_GPL(mm_iommu_preregistered
);
56 static long mm_iommu_do_alloc(struct mm_struct
*mm
, unsigned long ua
,
57 unsigned long entries
, unsigned long dev_hpa
,
58 struct mm_iommu_table_group_mem_t
**pmem
)
60 struct mm_iommu_table_group_mem_t
*mem
, *mem2
;
61 long i
, ret
, locked_entries
= 0, pinned
= 0;
62 unsigned int pageshift
;
63 unsigned long entry
, chunk
;
65 if (dev_hpa
== MM_IOMMU_TABLE_INVALID_HPA
) {
66 ret
= account_locked_vm(mm
, entries
, true);
70 locked_entries
= entries
;
73 mem
= kzalloc(sizeof(*mem
), GFP_KERNEL
);
79 if (dev_hpa
!= MM_IOMMU_TABLE_INVALID_HPA
) {
80 mem
->pageshift
= __ffs(dev_hpa
| (entries
<< PAGE_SHIFT
));
81 mem
->dev_hpa
= dev_hpa
;
84 mem
->dev_hpa
= MM_IOMMU_TABLE_INVALID_HPA
;
87 * For a starting point for a maximum page size calculation
88 * we use @ua and @entries natural alignment to allow IOMMU pages
89 * smaller than huge pages but still bigger than PAGE_SIZE.
91 mem
->pageshift
= __ffs(ua
| (entries
<< PAGE_SHIFT
));
92 mem
->hpas
= vzalloc(array_size(entries
, sizeof(mem
->hpas
[0])));
100 chunk
= (1UL << (PAGE_SHIFT
+ MAX_ORDER
- 1)) /
101 sizeof(struct vm_area_struct
*);
102 chunk
= min(chunk
, entries
);
103 for (entry
= 0; entry
< entries
; entry
+= chunk
) {
104 unsigned long n
= min(entries
- entry
, chunk
);
106 ret
= pin_user_pages(ua
+ (entry
<< PAGE_SHIFT
), n
,
107 FOLL_WRITE
| FOLL_LONGTERM
,
108 mem
->hpages
+ entry
, NULL
);
117 mmap_read_unlock(mm
);
118 if (pinned
!= entries
) {
125 atomic64_set(&mem
->mapped
, 1);
128 mem
->entries
= entries
;
130 mutex_lock(&mem_list_mutex
);
132 list_for_each_entry_rcu(mem2
, &mm
->context
.iommu_group_mem_list
, next
) {
134 if ((mem2
->ua
< (ua
+ (entries
<< PAGE_SHIFT
))) &&
136 (mem2
->entries
<< PAGE_SHIFT
)))) {
138 mutex_unlock(&mem_list_mutex
);
143 if (mem
->dev_hpa
== MM_IOMMU_TABLE_INVALID_HPA
) {
145 * Allow to use larger than 64k IOMMU pages. Only do that
146 * if we are backed by hugetlb. Skip device memory as it is not
147 * backed with page structs.
149 pageshift
= PAGE_SHIFT
;
150 for (i
= 0; i
< entries
; ++i
) {
151 struct page
*page
= mem
->hpages
[i
];
153 if ((mem
->pageshift
> PAGE_SHIFT
) && PageHuge(page
))
154 pageshift
= page_shift(compound_head(page
));
155 mem
->pageshift
= min(mem
->pageshift
, pageshift
);
157 * We don't need struct page reference any more, switch
158 * to physical address.
160 mem
->hpas
[i
] = page_to_pfn(page
) << PAGE_SHIFT
;
164 list_add_rcu(&mem
->next
, &mm
->context
.iommu_group_mem_list
);
166 mutex_unlock(&mem_list_mutex
);
173 /* free the references taken */
174 unpin_user_pages(mem
->hpages
, pinned
);
180 account_locked_vm(mm
, locked_entries
, false);
185 long mm_iommu_new(struct mm_struct
*mm
, unsigned long ua
, unsigned long entries
,
186 struct mm_iommu_table_group_mem_t
**pmem
)
188 return mm_iommu_do_alloc(mm
, ua
, entries
, MM_IOMMU_TABLE_INVALID_HPA
,
191 EXPORT_SYMBOL_GPL(mm_iommu_new
);
193 long mm_iommu_newdev(struct mm_struct
*mm
, unsigned long ua
,
194 unsigned long entries
, unsigned long dev_hpa
,
195 struct mm_iommu_table_group_mem_t
**pmem
)
197 return mm_iommu_do_alloc(mm
, ua
, entries
, dev_hpa
, pmem
);
199 EXPORT_SYMBOL_GPL(mm_iommu_newdev
);
201 static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t
*mem
)
204 struct page
*page
= NULL
;
209 for (i
= 0; i
< mem
->entries
; ++i
) {
213 page
= pfn_to_page(mem
->hpas
[i
] >> PAGE_SHIFT
);
217 if (mem
->hpas
[i
] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY
)
220 unpin_user_page(page
);
226 static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t
*mem
)
234 static void mm_iommu_free(struct rcu_head
*head
)
236 struct mm_iommu_table_group_mem_t
*mem
= container_of(head
,
237 struct mm_iommu_table_group_mem_t
, rcu
);
239 mm_iommu_do_free(mem
);
242 static void mm_iommu_release(struct mm_iommu_table_group_mem_t
*mem
)
244 list_del_rcu(&mem
->next
);
245 call_rcu(&mem
->rcu
, mm_iommu_free
);
248 long mm_iommu_put(struct mm_struct
*mm
, struct mm_iommu_table_group_mem_t
*mem
)
251 unsigned long unlock_entries
= 0;
253 mutex_lock(&mem_list_mutex
);
255 if (mem
->used
== 0) {
261 /* There are still users, exit */
265 /* Are there still mappings? */
266 if (atomic64_cmpxchg(&mem
->mapped
, 1, 0) != 1) {
272 if (mem
->dev_hpa
== MM_IOMMU_TABLE_INVALID_HPA
)
273 unlock_entries
= mem
->entries
;
275 /* @mapped became 0 so now mappings are disabled, release the region */
276 mm_iommu_release(mem
);
279 mutex_unlock(&mem_list_mutex
);
281 account_locked_vm(mm
, unlock_entries
, false);
285 EXPORT_SYMBOL_GPL(mm_iommu_put
);
287 struct mm_iommu_table_group_mem_t
*mm_iommu_lookup(struct mm_struct
*mm
,
288 unsigned long ua
, unsigned long size
)
290 struct mm_iommu_table_group_mem_t
*mem
, *ret
= NULL
;
292 list_for_each_entry_rcu(mem
, &mm
->context
.iommu_group_mem_list
, next
) {
293 if ((mem
->ua
<= ua
) &&
294 (ua
+ size
<= mem
->ua
+
295 (mem
->entries
<< PAGE_SHIFT
))) {
303 EXPORT_SYMBOL_GPL(mm_iommu_lookup
);
305 struct mm_iommu_table_group_mem_t
*mm_iommu_lookup_rm(struct mm_struct
*mm
,
306 unsigned long ua
, unsigned long size
)
308 struct mm_iommu_table_group_mem_t
*mem
, *ret
= NULL
;
310 list_for_each_entry_lockless(mem
, &mm
->context
.iommu_group_mem_list
,
312 if ((mem
->ua
<= ua
) &&
313 (ua
+ size
<= mem
->ua
+
314 (mem
->entries
<< PAGE_SHIFT
))) {
323 struct mm_iommu_table_group_mem_t
*mm_iommu_get(struct mm_struct
*mm
,
324 unsigned long ua
, unsigned long entries
)
326 struct mm_iommu_table_group_mem_t
*mem
, *ret
= NULL
;
328 mutex_lock(&mem_list_mutex
);
330 list_for_each_entry_rcu(mem
, &mm
->context
.iommu_group_mem_list
, next
) {
331 if ((mem
->ua
== ua
) && (mem
->entries
== entries
)) {
338 mutex_unlock(&mem_list_mutex
);
342 EXPORT_SYMBOL_GPL(mm_iommu_get
);
344 long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t
*mem
,
345 unsigned long ua
, unsigned int pageshift
, unsigned long *hpa
)
347 const long entry
= (ua
- mem
->ua
) >> PAGE_SHIFT
;
350 if (entry
>= mem
->entries
)
353 if (pageshift
> mem
->pageshift
)
357 *hpa
= mem
->dev_hpa
+ (ua
- mem
->ua
);
361 va
= &mem
->hpas
[entry
];
362 *hpa
= (*va
& MM_IOMMU_TABLE_GROUP_PAGE_MASK
) | (ua
& ~PAGE_MASK
);
366 EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa
);
368 long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t
*mem
,
369 unsigned long ua
, unsigned int pageshift
, unsigned long *hpa
)
371 const long entry
= (ua
- mem
->ua
) >> PAGE_SHIFT
;
374 if (entry
>= mem
->entries
)
377 if (pageshift
> mem
->pageshift
)
381 *hpa
= mem
->dev_hpa
+ (ua
- mem
->ua
);
385 pa
= (void *) vmalloc_to_phys(&mem
->hpas
[entry
]);
389 *hpa
= (*pa
& MM_IOMMU_TABLE_GROUP_PAGE_MASK
) | (ua
& ~PAGE_MASK
);
394 extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct
*mm
, unsigned long ua
)
396 struct mm_iommu_table_group_mem_t
*mem
;
401 mem
= mm_iommu_lookup_rm(mm
, ua
, PAGE_SIZE
);
405 if (mem
->dev_hpa
!= MM_IOMMU_TABLE_INVALID_HPA
)
408 entry
= (ua
- mem
->ua
) >> PAGE_SHIFT
;
409 va
= &mem
->hpas
[entry
];
411 pa
= (void *) vmalloc_to_phys(va
);
415 *pa
|= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY
;
418 bool mm_iommu_is_devmem(struct mm_struct
*mm
, unsigned long hpa
,
419 unsigned int pageshift
, unsigned long *size
)
421 struct mm_iommu_table_group_mem_t
*mem
;
424 list_for_each_entry_rcu(mem
, &mm
->context
.iommu_group_mem_list
, next
) {
425 if (mem
->dev_hpa
== MM_IOMMU_TABLE_INVALID_HPA
)
428 end
= mem
->dev_hpa
+ (mem
->entries
<< PAGE_SHIFT
);
429 if ((mem
->dev_hpa
<= hpa
) && (hpa
< end
)) {
431 * Since the IOMMU page size might be bigger than
432 * PAGE_SIZE, the amount of preregistered memory
433 * starting from @hpa might be smaller than 1<<pageshift
434 * and the caller needs to distinguish this situation.
436 *size
= min(1UL << pageshift
, end
- hpa
);
443 EXPORT_SYMBOL_GPL(mm_iommu_is_devmem
);
445 long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t
*mem
)
447 if (atomic64_inc_not_zero(&mem
->mapped
))
450 /* Last mm_iommu_put() has been called, no more mappings allowed() */
453 EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc
);
455 void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t
*mem
)
457 atomic64_add_unless(&mem
->mapped
, -1, 1);
459 EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec
);
461 void mm_iommu_init(struct mm_struct
*mm
)
463 INIT_LIST_HEAD_RCU(&mm
->context
.iommu_group_mem_list
);