1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
4 #ifndef __IOMMUFD_PRIVATE_H
5 #define __IOMMUFD_PRIVATE_H
7 #include <linux/iommu.h>
8 #include <linux/iommufd.h>
9 #include <linux/iova_bitmap.h>
10 #include <linux/rwsem.h>
11 #include <linux/uaccess.h>
12 #include <linux/xarray.h>
13 #include <uapi/linux/iommufd.h>
15 #include "../iommu-priv.h"
20 struct iommufd_device
;
24 struct xarray objects
;
26 wait_queue_head_t destroy_wait
;
27 struct rw_semaphore ioas_creation_lock
;
30 /* Compatibility with VFIO no iommu */
32 struct iommufd_ioas
*vfio_ioas
;
36 * The IOVA to PFN map. The map automatically copies the PFNs into multiple
37 * domains and permits sharing of PFNs between io_pagetable instances. This
38 * supports both a design where IOAS's are 1:1 with a domain (eg because the
39 * domain is HW customized), or where the IOAS is 1:N with multiple generic
40 * domains. The io_pagetable holds an interval tree of iopt_areas which point
41 * to shared iopt_pages which hold the pfns mapped to the page table.
43 * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex
46 struct rw_semaphore domains_rwsem
;
47 struct xarray domains
;
48 struct xarray access_list
;
49 unsigned int next_domain_id
;
51 struct rw_semaphore iova_rwsem
;
52 struct rb_root_cached area_itree
;
53 /* IOVA that cannot become reserved, struct iopt_allowed */
54 struct rb_root_cached allowed_itree
;
55 /* IOVA that cannot be allocated, struct iopt_reserved */
56 struct rb_root_cached reserved_itree
;
57 u8 disable_large_pages
;
58 unsigned long iova_alignment
;
61 void iopt_init_table(struct io_pagetable
*iopt
);
62 void iopt_destroy_table(struct io_pagetable
*iopt
);
63 int iopt_get_pages(struct io_pagetable
*iopt
, unsigned long iova
,
64 unsigned long length
, struct list_head
*pages_list
);
65 void iopt_free_pages_list(struct list_head
*pages_list
);
67 IOPT_ALLOC_IOVA
= 1 << 0,
69 int iopt_map_user_pages(struct iommufd_ctx
*ictx
, struct io_pagetable
*iopt
,
70 unsigned long *iova
, void __user
*uptr
,
71 unsigned long length
, int iommu_prot
,
73 int iopt_map_file_pages(struct iommufd_ctx
*ictx
, struct io_pagetable
*iopt
,
74 unsigned long *iova
, struct file
*file
,
75 unsigned long start
, unsigned long length
,
76 int iommu_prot
, unsigned int flags
);
77 int iopt_map_pages(struct io_pagetable
*iopt
, struct list_head
*pages_list
,
78 unsigned long length
, unsigned long *dst_iova
,
79 int iommu_prot
, unsigned int flags
);
80 int iopt_unmap_iova(struct io_pagetable
*iopt
, unsigned long iova
,
81 unsigned long length
, unsigned long *unmapped
);
82 int iopt_unmap_all(struct io_pagetable
*iopt
, unsigned long *unmapped
);
84 int iopt_read_and_clear_dirty_data(struct io_pagetable
*iopt
,
85 struct iommu_domain
*domain
,
87 struct iommu_hwpt_get_dirty_bitmap
*bitmap
);
88 int iopt_set_dirty_tracking(struct io_pagetable
*iopt
,
89 struct iommu_domain
*domain
, bool enable
);
91 void iommufd_access_notify_unmap(struct io_pagetable
*iopt
, unsigned long iova
,
92 unsigned long length
);
93 int iopt_table_add_domain(struct io_pagetable
*iopt
,
94 struct iommu_domain
*domain
);
95 void iopt_table_remove_domain(struct io_pagetable
*iopt
,
96 struct iommu_domain
*domain
);
97 int iopt_table_enforce_dev_resv_regions(struct io_pagetable
*iopt
,
99 phys_addr_t
*sw_msi_start
);
100 int iopt_set_allow_iova(struct io_pagetable
*iopt
,
101 struct rb_root_cached
*allowed_iova
);
102 int iopt_reserve_iova(struct io_pagetable
*iopt
, unsigned long start
,
103 unsigned long last
, void *owner
);
104 void iopt_remove_reserved_iova(struct io_pagetable
*iopt
, void *owner
);
105 int iopt_cut_iova(struct io_pagetable
*iopt
, unsigned long *iovas
,
107 void iopt_enable_large_pages(struct io_pagetable
*iopt
);
108 int iopt_disable_large_pages(struct io_pagetable
*iopt
);
110 struct iommufd_ucmd
{
111 struct iommufd_ctx
*ictx
;
112 void __user
*ubuffer
;
117 int iommufd_vfio_ioctl(struct iommufd_ctx
*ictx
, unsigned int cmd
,
120 /* Copy the response in ucmd->cmd back to userspace. */
121 static inline int iommufd_ucmd_respond(struct iommufd_ucmd
*ucmd
,
124 if (copy_to_user(ucmd
->ubuffer
, ucmd
->cmd
,
125 min_t(size_t, ucmd
->user_size
, cmd_len
)))
130 static inline bool iommufd_lock_obj(struct iommufd_object
*obj
)
132 if (!refcount_inc_not_zero(&obj
->users
))
134 if (!refcount_inc_not_zero(&obj
->shortterm_users
)) {
136 * If the caller doesn't already have a ref on obj this must be
137 * called under the xa_lock. Otherwise the caller is holding a
138 * ref on users. Thus it cannot be one before this decrement.
140 refcount_dec(&obj
->users
);
146 struct iommufd_object
*iommufd_get_object(struct iommufd_ctx
*ictx
, u32 id
,
147 enum iommufd_object_type type
);
148 static inline void iommufd_put_object(struct iommufd_ctx
*ictx
,
149 struct iommufd_object
*obj
)
152 * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
153 * a spurious !0 users with a 0 shortterm_users.
155 refcount_dec(&obj
->users
);
156 if (refcount_dec_and_test(&obj
->shortterm_users
))
157 wake_up_interruptible_all(&ictx
->destroy_wait
);
160 void iommufd_object_abort(struct iommufd_ctx
*ictx
, struct iommufd_object
*obj
);
161 void iommufd_object_abort_and_destroy(struct iommufd_ctx
*ictx
,
162 struct iommufd_object
*obj
);
163 void iommufd_object_finalize(struct iommufd_ctx
*ictx
,
164 struct iommufd_object
*obj
);
167 REMOVE_WAIT_SHORTTERM
= 1,
169 int iommufd_object_remove(struct iommufd_ctx
*ictx
,
170 struct iommufd_object
*to_destroy
, u32 id
,
174 * The caller holds a users refcount and wants to destroy the object. At this
175 * point the caller has no shortterm_users reference and at least the xarray
176 * will be holding one.
178 static inline void iommufd_object_destroy_user(struct iommufd_ctx
*ictx
,
179 struct iommufd_object
*obj
)
183 ret
= iommufd_object_remove(ictx
, obj
, obj
->id
, REMOVE_WAIT_SHORTTERM
);
186 * If there is a bug and we couldn't destroy the object then we did put
187 * back the caller's users refcount and will eventually try to free it
188 * again during close.
194 * The HWPT allocated by autodomains is used in possibly many devices and
195 * is automatically destroyed when its refcount reaches zero.
197 * If userspace uses the HWPT manually, even for a short term, then it will
198 * disrupt this refcounting and the auto-free in the kernel will not work.
199 * Userspace that tries to use the automatically allocated HWPT must be careful
200 * to ensure that it is consistently destroyed, eg by not racing accesses
201 * and by not attaching an automatic HWPT to a device manually.
204 iommufd_object_put_and_try_destroy(struct iommufd_ctx
*ictx
,
205 struct iommufd_object
*obj
)
207 iommufd_object_remove(ictx
, obj
, obj
->id
, 0);
210 #define __iommufd_object_alloc(ictx, ptr, type, obj) \
211 container_of(_iommufd_object_alloc( \
213 sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \
214 offsetof(typeof(*(ptr)), \
219 #define iommufd_object_alloc(ictx, ptr, type) \
220 __iommufd_object_alloc(ictx, ptr, type, obj)
223 * The IO Address Space (IOAS) pagetable is a virtual page table backed by the
224 * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
225 * mapping is copied into all of the associated domains and made available to
228 * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable
229 * object. When we go to attach a device to an IOAS we need to get an
230 * iommu_domain and wrapping iommufd_hw_pagetable for it.
232 * An iommu_domain & iommfd_hw_pagetable will be automatically selected
233 * for a device based on the hwpt_list. If no suitable iommu_domain
234 * is found a new iommu_domain will be created.
236 struct iommufd_ioas
{
237 struct iommufd_object obj
;
238 struct io_pagetable iopt
;
240 struct list_head hwpt_list
;
243 static inline struct iommufd_ioas
*iommufd_get_ioas(struct iommufd_ctx
*ictx
,
246 return container_of(iommufd_get_object(ictx
, id
,
248 struct iommufd_ioas
, obj
);
251 struct iommufd_ioas
*iommufd_ioas_alloc(struct iommufd_ctx
*ictx
);
252 int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd
*ucmd
);
253 void iommufd_ioas_destroy(struct iommufd_object
*obj
);
254 int iommufd_ioas_iova_ranges(struct iommufd_ucmd
*ucmd
);
255 int iommufd_ioas_allow_iovas(struct iommufd_ucmd
*ucmd
);
256 int iommufd_ioas_map(struct iommufd_ucmd
*ucmd
);
257 int iommufd_ioas_map_file(struct iommufd_ucmd
*ucmd
);
258 int iommufd_ioas_change_process(struct iommufd_ucmd
*ucmd
);
259 int iommufd_ioas_copy(struct iommufd_ucmd
*ucmd
);
260 int iommufd_ioas_unmap(struct iommufd_ucmd
*ucmd
);
261 int iommufd_ioas_option(struct iommufd_ucmd
*ucmd
);
262 int iommufd_option_rlimit_mode(struct iommu_option
*cmd
,
263 struct iommufd_ctx
*ictx
);
265 int iommufd_vfio_ioas(struct iommufd_ucmd
*ucmd
);
266 int iommufd_check_iova_range(struct io_pagetable
*iopt
,
267 struct iommu_hwpt_get_dirty_bitmap
*bitmap
);
270 * A HW pagetable is called an iommu_domain inside the kernel. This user object
271 * allows directly creating and inspecting the domains. Domains that have kernel
272 * owned page tables will be associated with an iommufd_ioas that provides the
275 struct iommufd_hw_pagetable
{
276 struct iommufd_object obj
;
277 struct iommu_domain
*domain
;
278 struct iommufd_fault
*fault
;
281 struct iommufd_hwpt_paging
{
282 struct iommufd_hw_pagetable common
;
283 struct iommufd_ioas
*ioas
;
284 bool auto_domain
: 1;
285 bool enforce_cache_coherency
: 1;
287 bool nest_parent
: 1;
288 /* Head at iommufd_ioas::hwpt_list */
289 struct list_head hwpt_item
;
292 struct iommufd_hwpt_nested
{
293 struct iommufd_hw_pagetable common
;
294 struct iommufd_hwpt_paging
*parent
;
295 struct iommufd_viommu
*viommu
;
298 static inline bool hwpt_is_paging(struct iommufd_hw_pagetable
*hwpt
)
300 return hwpt
->obj
.type
== IOMMUFD_OBJ_HWPT_PAGING
;
303 static inline struct iommufd_hwpt_paging
*
304 to_hwpt_paging(struct iommufd_hw_pagetable
*hwpt
)
306 return container_of(hwpt
, struct iommufd_hwpt_paging
, common
);
309 static inline struct iommufd_hwpt_nested
*
310 to_hwpt_nested(struct iommufd_hw_pagetable
*hwpt
)
312 return container_of(hwpt
, struct iommufd_hwpt_nested
, common
);
315 static inline struct iommufd_hwpt_paging
*
316 find_hwpt_paging(struct iommufd_hw_pagetable
*hwpt
)
318 switch (hwpt
->obj
.type
) {
319 case IOMMUFD_OBJ_HWPT_PAGING
:
320 return to_hwpt_paging(hwpt
);
321 case IOMMUFD_OBJ_HWPT_NESTED
:
322 return to_hwpt_nested(hwpt
)->parent
;
328 static inline struct iommufd_hwpt_paging
*
329 iommufd_get_hwpt_paging(struct iommufd_ucmd
*ucmd
, u32 id
)
331 return container_of(iommufd_get_object(ucmd
->ictx
, id
,
332 IOMMUFD_OBJ_HWPT_PAGING
),
333 struct iommufd_hwpt_paging
, common
.obj
);
336 static inline struct iommufd_hw_pagetable
*
337 iommufd_get_hwpt_nested(struct iommufd_ucmd
*ucmd
, u32 id
)
339 return container_of(iommufd_get_object(ucmd
->ictx
, id
,
340 IOMMUFD_OBJ_HWPT_NESTED
),
341 struct iommufd_hw_pagetable
, obj
);
344 int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd
*ucmd
);
345 int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd
*ucmd
);
347 struct iommufd_hwpt_paging
*
348 iommufd_hwpt_paging_alloc(struct iommufd_ctx
*ictx
, struct iommufd_ioas
*ioas
,
349 struct iommufd_device
*idev
, u32 flags
,
350 bool immediate_attach
,
351 const struct iommu_user_data
*user_data
);
352 int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable
*hwpt
,
353 struct iommufd_device
*idev
);
354 struct iommufd_hw_pagetable
*
355 iommufd_hw_pagetable_detach(struct iommufd_device
*idev
);
356 void iommufd_hwpt_paging_destroy(struct iommufd_object
*obj
);
357 void iommufd_hwpt_paging_abort(struct iommufd_object
*obj
);
358 void iommufd_hwpt_nested_destroy(struct iommufd_object
*obj
);
359 void iommufd_hwpt_nested_abort(struct iommufd_object
*obj
);
360 int iommufd_hwpt_alloc(struct iommufd_ucmd
*ucmd
);
361 int iommufd_hwpt_invalidate(struct iommufd_ucmd
*ucmd
);
363 static inline void iommufd_hw_pagetable_put(struct iommufd_ctx
*ictx
,
364 struct iommufd_hw_pagetable
*hwpt
)
366 if (hwpt
->obj
.type
== IOMMUFD_OBJ_HWPT_PAGING
) {
367 struct iommufd_hwpt_paging
*hwpt_paging
= to_hwpt_paging(hwpt
);
369 lockdep_assert_not_held(&hwpt_paging
->ioas
->mutex
);
371 if (hwpt_paging
->auto_domain
) {
372 iommufd_object_put_and_try_destroy(ictx
, &hwpt
->obj
);
376 refcount_dec(&hwpt
->obj
.users
);
379 struct iommufd_group
{
382 struct iommufd_ctx
*ictx
;
383 struct iommu_group
*group
;
384 struct iommufd_hw_pagetable
*hwpt
;
385 struct list_head device_list
;
386 phys_addr_t sw_msi_start
;
390 * A iommufd_device object represents the binding relationship between a
391 * consuming driver and the iommufd. These objects are created/destroyed by
392 * external drivers, not by userspace.
394 struct iommufd_device
{
395 struct iommufd_object obj
;
396 struct iommufd_ctx
*ictx
;
397 struct iommufd_group
*igroup
;
398 struct list_head group_item
;
399 /* always the physical device */
401 bool enforce_cache_coherency
;
402 /* protect iopf_enabled counter */
403 struct mutex iopf_lock
;
404 unsigned int iopf_enabled
;
407 static inline struct iommufd_device
*
408 iommufd_get_device(struct iommufd_ucmd
*ucmd
, u32 id
)
410 return container_of(iommufd_get_object(ucmd
->ictx
, id
,
412 struct iommufd_device
, obj
);
415 void iommufd_device_destroy(struct iommufd_object
*obj
);
416 int iommufd_get_hw_info(struct iommufd_ucmd
*ucmd
);
418 struct iommufd_access
{
419 struct iommufd_object obj
;
420 struct iommufd_ctx
*ictx
;
421 struct iommufd_ioas
*ioas
;
422 struct iommufd_ioas
*ioas_unpin
;
423 struct mutex ioas_lock
;
424 const struct iommufd_access_ops
*ops
;
426 unsigned long iova_alignment
;
427 u32 iopt_access_list_id
;
430 int iopt_add_access(struct io_pagetable
*iopt
, struct iommufd_access
*access
);
431 void iopt_remove_access(struct io_pagetable
*iopt
,
432 struct iommufd_access
*access
,
433 u32 iopt_access_list_id
);
434 void iommufd_access_destroy_object(struct iommufd_object
*obj
);
437 * An iommufd_fault object represents an interface to deliver I/O page faults
438 * to the user space. These objects are created/destroyed by the user space and
439 * associated with hardware page table objects during page-table allocation.
441 struct iommufd_fault
{
442 struct iommufd_object obj
;
443 struct iommufd_ctx
*ictx
;
446 /* The lists of outstanding faults protected by below mutex. */
448 struct list_head deliver
;
449 struct xarray response
;
451 struct wait_queue_head wait_queue
;
454 struct iommufd_attach_handle
{
455 struct iommu_attach_handle handle
;
456 struct iommufd_device
*idev
;
459 /* Convert an iommu attach handle to iommufd handle. */
460 #define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle)
462 static inline struct iommufd_fault
*
463 iommufd_get_fault(struct iommufd_ucmd
*ucmd
, u32 id
)
465 return container_of(iommufd_get_object(ucmd
->ictx
, id
,
467 struct iommufd_fault
, obj
);
470 int iommufd_fault_alloc(struct iommufd_ucmd
*ucmd
);
471 void iommufd_fault_destroy(struct iommufd_object
*obj
);
472 int iommufd_fault_iopf_handler(struct iopf_group
*group
);
474 int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable
*hwpt
,
475 struct iommufd_device
*idev
);
476 void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable
*hwpt
,
477 struct iommufd_device
*idev
);
478 int iommufd_fault_domain_replace_dev(struct iommufd_device
*idev
,
479 struct iommufd_hw_pagetable
*hwpt
,
480 struct iommufd_hw_pagetable
*old
);
482 static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable
*hwpt
,
483 struct iommufd_device
*idev
)
486 return iommufd_fault_domain_attach_dev(hwpt
, idev
);
488 return iommu_attach_group(hwpt
->domain
, idev
->igroup
->group
);
491 static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable
*hwpt
,
492 struct iommufd_device
*idev
)
495 iommufd_fault_domain_detach_dev(hwpt
, idev
);
499 iommu_detach_group(hwpt
->domain
, idev
->igroup
->group
);
502 static inline int iommufd_hwpt_replace_device(struct iommufd_device
*idev
,
503 struct iommufd_hw_pagetable
*hwpt
,
504 struct iommufd_hw_pagetable
*old
)
506 if (old
->fault
|| hwpt
->fault
)
507 return iommufd_fault_domain_replace_dev(idev
, hwpt
, old
);
509 return iommu_group_replace_domain(idev
->igroup
->group
, hwpt
->domain
);
512 static inline struct iommufd_viommu
*
513 iommufd_get_viommu(struct iommufd_ucmd
*ucmd
, u32 id
)
515 return container_of(iommufd_get_object(ucmd
->ictx
, id
,
517 struct iommufd_viommu
, obj
);
520 int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd
*ucmd
);
521 void iommufd_viommu_destroy(struct iommufd_object
*obj
);
522 int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd
*ucmd
);
523 void iommufd_vdevice_destroy(struct iommufd_object
*obj
);
525 struct iommufd_vdevice
{
526 struct iommufd_object obj
;
527 struct iommufd_ctx
*ictx
;
528 struct iommufd_viommu
*viommu
;
530 u64 id
; /* per-vIOMMU virtual ID */
533 #ifdef CONFIG_IOMMUFD_TEST
534 int iommufd_test(struct iommufd_ucmd
*ucmd
);
535 void iommufd_selftest_destroy(struct iommufd_object
*obj
);
536 extern size_t iommufd_test_memory_limit
;
537 void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd
*ucmd
,
538 unsigned int ioas_id
, u64
*iova
, u32
*flags
);
539 bool iommufd_should_fail(void);
540 int __init
iommufd_test_init(void);
541 void iommufd_test_exit(void);
542 bool iommufd_selftest_is_mock_dev(struct device
*dev
);
544 static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd
*ucmd
,
545 unsigned int ioas_id
,
546 u64
*iova
, u32
*flags
)
549 static inline bool iommufd_should_fail(void)
553 static inline int __init
iommufd_test_init(void)
557 static inline void iommufd_test_exit(void)
560 static inline bool iommufd_selftest_is_mock_dev(struct device
*dev
)