1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
4 #ifndef _UAPI_IOMMUFD_H
5 #define _UAPI_IOMMUFD_H
7 #include <linux/ioctl.h>
8 #include <linux/types.h>
10 #define IOMMUFD_TYPE (';')
13 * DOC: General ioctl format
15 * The ioctl interface follows a general format to allow for extensibility. Each
16 * ioctl is passed in a structure pointer as the argument providing the size of
17 * the structure in the first u32. The kernel checks that any structure space
18 * beyond what it understands is 0. This allows userspace to use the backward
19 * compatible portion while consistently using the newer, larger, structures.
21 * ioctls use a standard meaning for common errnos:
23 * - ENOTTY: The IOCTL number itself is not supported at all
24 * - E2BIG: The IOCTL number is supported, but the provided structure has
25 * non-zero in a part the kernel does not understand.
26 * - EOPNOTSUPP: The IOCTL number is supported, and the structure is
27 * understood, however a known field has a value the kernel does not
28 * understand or support.
29 * - EINVAL: Everything about the IOCTL was understood, but a field is not
31 * - ENOENT: An ID or IOVA provided does not exist.
32 * - ENOMEM: Out of memory.
33 * - EOVERFLOW: Mathematics overflowed.
35 * As well as additional errnos, within specific ioctls.
38 IOMMUFD_CMD_BASE
= 0x80,
39 IOMMUFD_CMD_DESTROY
= IOMMUFD_CMD_BASE
,
40 IOMMUFD_CMD_IOAS_ALLOC
= 0x81,
41 IOMMUFD_CMD_IOAS_ALLOW_IOVAS
= 0x82,
42 IOMMUFD_CMD_IOAS_COPY
= 0x83,
43 IOMMUFD_CMD_IOAS_IOVA_RANGES
= 0x84,
44 IOMMUFD_CMD_IOAS_MAP
= 0x85,
45 IOMMUFD_CMD_IOAS_UNMAP
= 0x86,
46 IOMMUFD_CMD_OPTION
= 0x87,
47 IOMMUFD_CMD_VFIO_IOAS
= 0x88,
48 IOMMUFD_CMD_HWPT_ALLOC
= 0x89,
49 IOMMUFD_CMD_GET_HW_INFO
= 0x8a,
50 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING
= 0x8b,
51 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP
= 0x8c,
52 IOMMUFD_CMD_HWPT_INVALIDATE
= 0x8d,
53 IOMMUFD_CMD_FAULT_QUEUE_ALLOC
= 0x8e,
54 IOMMUFD_CMD_IOAS_MAP_FILE
= 0x8f,
55 IOMMUFD_CMD_VIOMMU_ALLOC
= 0x90,
56 IOMMUFD_CMD_VDEVICE_ALLOC
= 0x91,
57 IOMMUFD_CMD_IOAS_CHANGE_PROCESS
= 0x92,
61 * struct iommu_destroy - ioctl(IOMMU_DESTROY)
62 * @size: sizeof(struct iommu_destroy)
63 * @id: iommufd object ID to destroy. Can be any destroyable object type.
65 * Destroy any object held within iommufd.
67 struct iommu_destroy
{
71 #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
74 * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
75 * @size: sizeof(struct iommu_ioas_alloc)
77 * @out_ioas_id: Output IOAS ID for the allocated object
79 * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
82 struct iommu_ioas_alloc
{
87 #define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
90 * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
92 * @last: Inclusive last IOVA
94 * An interval in IOVA space.
96 struct iommu_iova_range
{
102 * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
103 * @size: sizeof(struct iommu_ioas_iova_ranges)
104 * @ioas_id: IOAS ID to read ranges from
105 * @num_iovas: Input/Output total number of ranges in the IOAS
106 * @__reserved: Must be 0
107 * @allowed_iovas: Pointer to the output array of struct iommu_iova_range
108 * @out_iova_alignment: Minimum alignment required for mapping IOVA
110 * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
111 * is not allowed. num_iovas will be set to the total number of iovas and
112 * the allowed_iovas[] will be filled in as space permits.
114 * The allowed ranges are dependent on the HW path the DMA operation takes, and
115 * can change during the lifetime of the IOAS. A fresh empty IOAS will have a
116 * full range, and each attached device will narrow the ranges based on that
117 * device's HW restrictions. Detaching a device can widen the ranges. Userspace
118 * should query ranges after every attach/detach to know what IOVAs are valid
121 * On input num_iovas is the length of the allowed_iovas array. On output it is
122 * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
123 * num_iovas to the required value if num_iovas is too small. In this case the
124 * caller should allocate a larger output array and re-issue the ioctl.
126 * out_iova_alignment returns the minimum IOVA alignment that can be given
127 * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
129 * starting_iova % out_iova_alignment == 0
130 * (starting_iova + length) % out_iova_alignment == 0
132 * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
133 * be higher than the system PAGE_SIZE.
135 struct iommu_ioas_iova_ranges
{
140 __aligned_u64 allowed_iovas
;
141 __aligned_u64 out_iova_alignment
;
143 #define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
146 * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
147 * @size: sizeof(struct iommu_ioas_allow_iovas)
148 * @ioas_id: IOAS ID to allow IOVAs from
149 * @num_iovas: Input/Output total number of ranges in the IOAS
150 * @__reserved: Must be 0
151 * @allowed_iovas: Pointer to array of struct iommu_iova_range
153 * Ensure a range of IOVAs are always available for allocation. If this call
154 * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges
155 * that are narrower than the ranges provided here. This call will fail if
156 * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
158 * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as
159 * devices are attached the IOVA will narrow based on the device restrictions.
160 * When an allowed range is specified any narrowing will be refused, ie device
161 * attachment can fail if the device requires limiting within the allowed range.
163 * Automatic IOVA allocation is also impacted by this call. MAP will only
164 * allocate within the allowed IOVAs if they are present.
166 * This call replaces the entire allowed list with the given list.
168 struct iommu_ioas_allow_iovas
{
173 __aligned_u64 allowed_iovas
;
175 #define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
178 * enum iommufd_ioas_map_flags - Flags for map and copy
179 * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
180 * IOVA to place the mapping at
181 * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
182 * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
184 enum iommufd_ioas_map_flags
{
185 IOMMU_IOAS_MAP_FIXED_IOVA
= 1 << 0,
186 IOMMU_IOAS_MAP_WRITEABLE
= 1 << 1,
187 IOMMU_IOAS_MAP_READABLE
= 1 << 2,
191 * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
192 * @size: sizeof(struct iommu_ioas_map)
193 * @flags: Combination of enum iommufd_ioas_map_flags
194 * @ioas_id: IOAS ID to change the mapping of
195 * @__reserved: Must be 0
196 * @user_va: Userspace pointer to start mapping from
197 * @length: Number of bytes to map
198 * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
199 * then this must be provided as input.
201 * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
202 * mapping will be established at iova, otherwise a suitable location based on
203 * the reserved and allowed lists will be automatically selected and returned in
206 * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
207 * be unused, existing IOVA cannot be replaced.
209 struct iommu_ioas_map
{
214 __aligned_u64 user_va
;
215 __aligned_u64 length
;
218 #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
221 * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
222 * @size: sizeof(struct iommu_ioas_map_file)
223 * @flags: same as for iommu_ioas_map
224 * @ioas_id: same as for iommu_ioas_map
225 * @fd: the memfd to map
226 * @start: byte offset from start of file to map from
227 * @length: same as for iommu_ioas_map
228 * @iova: same as for iommu_ioas_map
230 * Set an IOVA mapping from a memfd file. All other arguments and semantics
231 * match those of IOMMU_IOAS_MAP.
233 struct iommu_ioas_map_file
{
239 __aligned_u64 length
;
242 #define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE)
245 * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
246 * @size: sizeof(struct iommu_ioas_copy)
247 * @flags: Combination of enum iommufd_ioas_map_flags
248 * @dst_ioas_id: IOAS ID to change the mapping of
249 * @src_ioas_id: IOAS ID to copy from
250 * @length: Number of bytes to copy and map
251 * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
252 * set then this must be provided as input.
253 * @src_iova: IOVA to start the copy
255 * Copy an already existing mapping from src_ioas_id and establish it in
256 * dst_ioas_id. The src iova/length must exactly match a range used with
259 * This may be used to efficiently clone a subset of an IOAS to another, or as a
260 * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
261 * establishing equivalent new mappings, as internal resources are shared, and
262 * the kernel will pin the user memory only once.
264 struct iommu_ioas_copy
{
269 __aligned_u64 length
;
270 __aligned_u64 dst_iova
;
271 __aligned_u64 src_iova
;
273 #define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
276 * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
277 * @size: sizeof(struct iommu_ioas_unmap)
278 * @ioas_id: IOAS ID to change the mapping of
279 * @iova: IOVA to start the unmapping at
280 * @length: Number of bytes to unmap, and return back the bytes unmapped
282 * Unmap an IOVA range. The iova/length must be a superset of a previously
283 * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
284 * truncating ranges is not allowed. The values 0 to U64_MAX will unmap
287 struct iommu_ioas_unmap
{
291 __aligned_u64 length
;
293 #define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
296 * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
297 * ioctl(IOMMU_OPTION_HUGE_PAGES)
298 * @IOMMU_OPTION_RLIMIT_MODE:
299 * Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
300 * to invoke this. Value 0 (default) is user based accounting, 1 uses process
301 * based accounting. Global option, object_id must be 0
302 * @IOMMU_OPTION_HUGE_PAGES:
303 * Value 1 (default) allows contiguous pages to be combined when generating
304 * iommu mappings. Value 0 disables combining, everything is mapped to
305 * PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS
306 * option, the object_id must be the IOAS ID.
308 enum iommufd_option
{
309 IOMMU_OPTION_RLIMIT_MODE
= 0,
310 IOMMU_OPTION_HUGE_PAGES
= 1,
314 * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
315 * ioctl(IOMMU_OPTION_OP_GET)
316 * @IOMMU_OPTION_OP_SET: Set the option's value
317 * @IOMMU_OPTION_OP_GET: Get the option's value
319 enum iommufd_option_ops
{
320 IOMMU_OPTION_OP_SET
= 0,
321 IOMMU_OPTION_OP_GET
= 1,
325 * struct iommu_option - iommu option multiplexer
326 * @size: sizeof(struct iommu_option)
327 * @option_id: One of enum iommufd_option
328 * @op: One of enum iommufd_option_ops
329 * @__reserved: Must be 0
330 * @object_id: ID of the object if required
331 * @val64: Option value to set or value returned on get
333 * Change a simple option value. This multiplexor allows controlling options
334 * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET
335 * will return the current value.
337 struct iommu_option
{
345 #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
348 * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
349 * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
350 * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
351 * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
353 enum iommufd_vfio_ioas_op
{
354 IOMMU_VFIO_IOAS_GET
= 0,
355 IOMMU_VFIO_IOAS_SET
= 1,
356 IOMMU_VFIO_IOAS_CLEAR
= 2,
360 * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
361 * @size: sizeof(struct iommu_vfio_ioas)
362 * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
363 * For IOMMU_VFIO_IOAS_GET will output the IOAS ID
364 * @op: One of enum iommufd_vfio_ioas_op
365 * @__reserved: Must be 0
367 * The VFIO compatibility support uses a single ioas because VFIO APIs do not
368 * support the ID field. Set or Get the IOAS that VFIO compatibility will use.
369 * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
370 * compatibility ioas, either by taking what is already set, or auto creating
371 * one. From then on VFIO will continue to use that ioas and is not effected by
372 * this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
374 struct iommu_vfio_ioas
{
380 #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
383 * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation
384 * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as
385 * the parent HWPT in a nesting configuration.
386 * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
387 * enforced on device attachment
388 * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
390 * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The
391 * domain can be attached to any PASID on the device.
392 * Any domain attached to the non-PASID part of the
393 * device must also be flagged, otherwise attaching a
394 * PASID will blocked.
395 * If IOMMU does not support PASID it will return
396 * error (-EOPNOTSUPP).
398 enum iommufd_hwpt_alloc_flags
{
399 IOMMU_HWPT_ALLOC_NEST_PARENT
= 1 << 0,
400 IOMMU_HWPT_ALLOC_DIRTY_TRACKING
= 1 << 1,
401 IOMMU_HWPT_FAULT_ID_VALID
= 1 << 2,
402 IOMMU_HWPT_ALLOC_PASID
= 1 << 3,
406 * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table
408 * @IOMMU_VTD_S1_SRE: Supervisor request
409 * @IOMMU_VTD_S1_EAFE: Extended access enable
410 * @IOMMU_VTD_S1_WPE: Write protect enable
412 enum iommu_hwpt_vtd_s1_flags
{
413 IOMMU_VTD_S1_SRE
= 1 << 0,
414 IOMMU_VTD_S1_EAFE
= 1 << 1,
415 IOMMU_VTD_S1_WPE
= 1 << 2,
419 * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table
420 * info (IOMMU_HWPT_DATA_VTD_S1)
421 * @flags: Combination of enum iommu_hwpt_vtd_s1_flags
422 * @pgtbl_addr: The base address of the stage-1 page table.
423 * @addr_width: The address width of the stage-1 page table
424 * @__reserved: Must be 0
426 struct iommu_hwpt_vtd_s1
{
428 __aligned_u64 pgtbl_addr
;
434 * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE
435 * (IOMMU_HWPT_DATA_ARM_SMMUV3)
437 * @ste: The first two double words of the user space Stream Table Entry for
438 * the translation. Must be little-endian.
439 * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
440 * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
441 * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
443 * -EIO will be returned if @ste is not legal or contains any non-allowed field.
444 * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
445 * nested domain will translate the same as the nesting parent. The S1 will
446 * install a Context Descriptor Table pointing at userspace memory translated
447 * by the nesting parent.
449 struct iommu_hwpt_arm_smmuv3
{
450 __aligned_le64 ste
[2];
454 * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
455 * @IOMMU_HWPT_DATA_NONE: no data
456 * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
457 * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table
459 enum iommu_hwpt_data_type
{
460 IOMMU_HWPT_DATA_NONE
= 0,
461 IOMMU_HWPT_DATA_VTD_S1
= 1,
462 IOMMU_HWPT_DATA_ARM_SMMUV3
= 2,
466 * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
467 * @size: sizeof(struct iommu_hwpt_alloc)
468 * @flags: Combination of enum iommufd_hwpt_alloc_flags
469 * @dev_id: The device to allocate this HWPT for
470 * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to
471 * @out_hwpt_id: The ID of the new HWPT
472 * @__reserved: Must be 0
473 * @data_type: One of enum iommu_hwpt_data_type
474 * @data_len: Length of the type specific data
475 * @data_uptr: User pointer to the type specific data
476 * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
477 * IOMMU_HWPT_FAULT_ID_VALID is set.
478 * @__reserved2: Padding to 64-bit alignment. Must be 0.
480 * Explicitly allocate a hardware page table object. This is the same object
481 * type that is returned by iommufd_device_attach() and represents the
482 * underlying iommu driver's iommu_domain kernel object.
484 * A kernel-managed HWPT will be created with the mappings from the given
485 * IOAS via the @pt_id. The @data_type for this allocation must be set to
486 * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
487 * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
489 * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a
490 * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be
491 * allocated previously via the same ioctl from a given IOAS (@pt_id). In this
492 * case, the @data_type must be set to a pre-defined type corresponding to an
493 * I/O page table type supported by the underlying IOMMU hardware. The device
494 * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU
497 * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
498 * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
501 struct iommu_hwpt_alloc
{
510 __aligned_u64 data_uptr
;
514 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
517 * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info
518 * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings
519 * on a nested_parent domain.
520 * https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
522 enum iommu_hw_info_vtd_flags
{
523 IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17
= 1 << 0,
527 * struct iommu_hw_info_vtd - Intel VT-d hardware information
529 * @flags: Combination of enum iommu_hw_info_vtd_flags
530 * @__reserved: Must be 0
532 * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec
533 * section 11.4.2 Capability Register.
534 * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec
535 * section 11.4.3 Extended Capability Register.
537 * User needs to understand the Intel VT-d specification to decode the
540 struct iommu_hw_info_vtd
{
543 __aligned_u64 cap_reg
;
544 __aligned_u64 ecap_reg
;
548 * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
549 * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
551 * @flags: Must be set to 0
552 * @__reserved: Must be 0
553 * @idr: Implemented features for ARM SMMU Non-secure programming interface
554 * @iidr: Information about the implementation and implementer of ARM SMMU,
555 * and architecture version supported
556 * @aidr: ARM SMMU architecture version
558 * For the details of @idr, @iidr and @aidr, please refer to the chapters
559 * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
561 * This reports the raw HW capability, and not all bits are meaningful to be
562 * read by userspace. Only the following fields should be used:
564 * idr[0]: ST_LEVEL, TERM_MODEL, STALL_MODEL, TTENDIAN , CD2L, ASID16, TTF
565 * idr[1]: SIDSIZE, SSIDSIZE
567 * idr[5]: VAX, GRAN64K, GRAN16K, GRAN4K
569 * - S1P should be assumed to be true if a NESTED HWPT can be created
570 * - VFIO/iommufd only support platforms with COHACC, it should be assumed to be
572 * - ATS is a per-device property. If the VMM describes any devices as ATS
573 * capable in ACPI/DT it should set the corresponding idr.
575 * This list may expand in future (eg E0PD, AIE, PBHA, D128, DS etc). It is
576 * important that VMMs do not read bits outside the list to allow for
577 * compatibility with future kernels. Several features in the SMMUv3
578 * architecture are not currently supported by the kernel for nesting: HTTU,
579 * BTM, MPAM and others.
581 struct iommu_hw_info_arm_smmuv3
{
590 * enum iommu_hw_info_type - IOMMU Hardware Info Types
591 * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
593 * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
594 * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
596 enum iommu_hw_info_type
{
597 IOMMU_HW_INFO_TYPE_NONE
= 0,
598 IOMMU_HW_INFO_TYPE_INTEL_VTD
= 1,
599 IOMMU_HW_INFO_TYPE_ARM_SMMUV3
= 2,
603 * enum iommufd_hw_capabilities
604 * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking
605 * If available, it means the following APIs
608 * IOMMU_HWPT_GET_DIRTY_BITMAP
609 * IOMMU_HWPT_SET_DIRTY_TRACKING
612 enum iommufd_hw_capabilities
{
613 IOMMU_HW_CAP_DIRTY_TRACKING
= 1 << 0,
617 * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO)
618 * @size: sizeof(struct iommu_hw_info)
620 * @dev_id: The device bound to the iommufd
621 * @data_len: Input the length of a user buffer in bytes. Output the length of
622 * data that kernel supports
623 * @data_uptr: User pointer to a user-space buffer used by the kernel to fill
624 * the iommu type specific hardware information data
625 * @out_data_type: Output the iommu hardware info type as defined in the enum
626 * iommu_hw_info_type.
627 * @out_capabilities: Output the generic iommu capability info type as defined
628 * in the enum iommu_hw_capabilities.
629 * @__reserved: Must be 0
631 * Query an iommu type specific hardware information data from an iommu behind
632 * a given device that has been bound to iommufd. This hardware info data will
633 * be used to sync capabilities between the virtual iommu and the physical
634 * iommu, e.g. a nested translation setup needs to check the hardware info, so
635 * a guest stage-1 page table can be compatible with the physical iommu.
637 * To capture an iommu type specific hardware information data, @data_uptr and
638 * its length @data_len must be provided. Trailing bytes will be zeroed if the
639 * user buffer is larger than the data that kernel has. Otherwise, kernel only
640 * fills the buffer using the given length in @data_len. If the ioctl succeeds,
641 * @data_len will be updated to the length that kernel actually supports,
642 * @out_data_type will be filled to decode the data filled in the buffer
643 * pointed by @data_uptr. Input @data_len == zero is allowed.
645 struct iommu_hw_info
{
650 __aligned_u64 data_uptr
;
653 __aligned_u64 out_capabilities
;
655 #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
658 * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty
660 * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking
662 enum iommufd_hwpt_set_dirty_tracking_flags
{
663 IOMMU_HWPT_DIRTY_TRACKING_ENABLE
= 1,
667 * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING)
668 * @size: sizeof(struct iommu_hwpt_set_dirty_tracking)
669 * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags
670 * @hwpt_id: HW pagetable ID that represents the IOMMU domain
671 * @__reserved: Must be 0
673 * Toggle dirty tracking on an HW pagetable.
675 struct iommu_hwpt_set_dirty_tracking
{
681 #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
682 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
685 * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits
686 * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing
687 * any dirty bits metadata. This flag
688 * can be passed in the expectation
689 * where the next operation is an unmap
690 * of the same IOVA range.
693 enum iommufd_hwpt_get_dirty_bitmap_flags
{
694 IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR
= 1,
698 * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
699 * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
700 * @hwpt_id: HW pagetable ID that represents the IOMMU domain
701 * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags
702 * @__reserved: Must be 0
703 * @iova: base IOVA of the bitmap first bit
704 * @length: IOVA range size
705 * @page_size: page size granularity of each bit in the bitmap
706 * @data: bitmap where to set the dirty bits. The bitmap bits each
707 * represent a page_size which you deviate from an arbitrary iova.
709 * Checking a given IOVA is dirty:
711 * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
713 * Walk the IOMMU pagetables for a given IOVA range to return a bitmap
714 * with the dirty IOVAs. In doing so it will also by default clear any
715 * dirty bit metadata set in the IOPTE.
717 struct iommu_hwpt_get_dirty_bitmap
{
723 __aligned_u64 length
;
724 __aligned_u64 page_size
;
727 #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
728 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
731 * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
733 * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
734 * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3
736 enum iommu_hwpt_invalidate_data_type
{
737 IOMMU_HWPT_INVALIDATE_DATA_VTD_S1
= 0,
738 IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3
= 1,
742 * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
743 * stage-1 cache invalidation
744 * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
745 * to all-levels page structure cache or just
746 * the leaf PTE cache.
748 enum iommu_hwpt_vtd_s1_invalidate_flags
{
749 IOMMU_VTD_INV_FLAGS_LEAF
= 1 << 0,
753 * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
754 * (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
755 * @addr: The start address of the range to be invalidated. It needs to
757 * @npages: Number of contiguous 4K pages to be invalidated.
758 * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
759 * @__reserved: Must be 0
761 * The Intel VT-d specific invalidation data for user-managed stage-1 cache
762 * invalidation in nested translation. Userspace uses this structure to
763 * tell the impacted cache scope after modifying the stage-1 page table.
765 * Invalidating all the caches related to the page table by setting @addr
766 * to be 0 and @npages to be U64_MAX.
768 * The device TLB will be invalidated automatically if ATS is enabled.
770 struct iommu_hwpt_vtd_s1_invalidate
{
772 __aligned_u64 npages
;
778 * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cache invalidation
779 * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
780 * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
781 * Must be little-endian.
783 * Supported command list only when passing in a vIOMMU via @hwpt_id:
784 * CMDQ_OP_TLBI_NSNH_ALL
786 * CMDQ_OP_TLBI_NH_VAA
787 * CMDQ_OP_TLBI_NH_ALL
788 * CMDQ_OP_TLBI_NH_ASID
791 * CMDQ_OP_CFGI_CD_ALL
793 * -EIO will be returned if the command is not supported.
795 struct iommu_viommu_arm_smmuv3_invalidate
{
796 __aligned_le64 cmd
[2];
800 * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
801 * @size: sizeof(struct iommu_hwpt_invalidate)
802 * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation
803 * @data_uptr: User pointer to an array of driver-specific cache invalidation
805 * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
806 * type of all the entries in the invalidation request array. It
807 * should be a type supported by the hwpt pointed by @hwpt_id.
808 * @entry_len: Length (in bytes) of a request entry in the request array
809 * @entry_num: Input the number of cache invalidation requests in the array.
810 * Output the number of requests successfully handled by kernel.
811 * @__reserved: Must be 0.
813 * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications
814 * on a user-managed page table should be followed by this operation, if a HWPT
815 * is passed in via @hwpt_id. Other caches, such as device cache or descriptor
816 * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field.
818 * Each ioctl can support one or more cache invalidation requests in the array
819 * that has a total size of @entry_len * @entry_num.
821 * An empty invalidation request array by setting @entry_num==0 is allowed, and
822 * @entry_len and @data_uptr would be ignored in this case. This can be used to
823 * check if the given @data_type is supported or not by kernel.
825 struct iommu_hwpt_invalidate
{
828 __aligned_u64 data_uptr
;
834 #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
837 * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
838 * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
840 * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
842 enum iommu_hwpt_pgfault_flags
{
843 IOMMU_PGFAULT_FLAGS_PASID_VALID
= (1 << 0),
844 IOMMU_PGFAULT_FLAGS_LAST_PAGE
= (1 << 1),
848 * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
849 * @IOMMU_PGFAULT_PERM_READ: request for read permission
850 * @IOMMU_PGFAULT_PERM_WRITE: request for write permission
851 * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
852 * Execute Requested bit set in PASID TLP Prefix.
853 * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
854 * Privileged Mode Requested bit set in PASID TLP
857 enum iommu_hwpt_pgfault_perm
{
858 IOMMU_PGFAULT_PERM_READ
= (1 << 0),
859 IOMMU_PGFAULT_PERM_WRITE
= (1 << 1),
860 IOMMU_PGFAULT_PERM_EXEC
= (1 << 2),
861 IOMMU_PGFAULT_PERM_PRIV
= (1 << 3),
865 * struct iommu_hwpt_pgfault - iommu page fault data
866 * @flags: Combination of enum iommu_hwpt_pgfault_flags
867 * @dev_id: id of the originated device
868 * @pasid: Process Address Space ID
869 * @grpid: Page Request Group Index
870 * @perm: Combination of enum iommu_hwpt_pgfault_perm
871 * @addr: Fault address
872 * @length: a hint of how much data the requestor is expecting to fetch. For
873 * example, if the PRI initiator knows it is going to do a 10MB
874 * transfer, it could fill in 10MB and the OS could pre-fault in
875 * 10MB of IOVA. It's default to 0 if there's no such hint.
876 * @cookie: kernel-managed cookie identifying a group of fault messages. The
877 * cookie number encoded in the last page fault of the group should
878 * be echoed back in the response message.
880 struct iommu_hwpt_pgfault
{
892 * enum iommufd_page_response_code - Return status of fault handlers
893 * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
894 * populated, retry the access. This is the
895 * "Success" defined in PCI 10.4.2.1.
896 * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
897 * access. This is the "Invalid Request" in PCI
900 enum iommufd_page_response_code
{
901 IOMMUFD_PAGE_RESP_SUCCESS
= 0,
902 IOMMUFD_PAGE_RESP_INVALID
= 1,
906 * struct iommu_hwpt_page_response - IOMMU page fault response
907 * @cookie: The kernel-managed cookie reported in the fault message.
908 * @code: One of response code in enum iommufd_page_response_code.
910 struct iommu_hwpt_page_response
{
916 * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
917 * @size: sizeof(struct iommu_fault_alloc)
919 * @out_fault_id: The ID of the new FAULT
920 * @out_fault_fd: The fd of the new FAULT
922 * Explicitly allocate a fault handling object.
924 struct iommu_fault_alloc
{
930 #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
933 * enum iommu_viommu_type - Virtual IOMMU Type
934 * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use
935 * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type
937 enum iommu_viommu_type
{
938 IOMMU_VIOMMU_TYPE_DEFAULT
= 0,
939 IOMMU_VIOMMU_TYPE_ARM_SMMUV3
= 1,
943 * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
944 * @size: sizeof(struct iommu_viommu_alloc)
946 * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type
947 * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU
948 * @hwpt_id: ID of a nesting parent HWPT to associate to
949 * @out_viommu_id: Output virtual IOMMU ID for the allocated object
951 * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's
952 * virtualization support that is a security-isolated slice of the real IOMMU HW
953 * that is unique to a specific VM. Operations global to the IOMMU are connected
954 * to the vIOMMU, such as:
955 * - Security namespace for guest owned ID, e.g. guest-controlled cache tags
956 * - Non-device-affiliated event reporting, e.g. invalidation queue errors
957 * - Access to a sharable nesting parent pagetable across physical IOMMUs
958 * - Virtualization of various platforms IDs, e.g. RIDs and others
959 * - Delivery of paravirtualized invalidation
960 * - Direct assigned invalidation queues
961 * - Direct assigned interrupts
963 struct iommu_viommu_alloc
{
971 #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
974 * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC)
975 * @size: sizeof(struct iommu_vdevice_alloc)
976 * @viommu_id: vIOMMU ID to associate with the virtual device
977 * @dev_id: The physical device to allocate a virtual instance on the vIOMMU
978 * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY
979 * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID
980 * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table
982 * Allocate a virtual device instance (for a physical device) against a vIOMMU.
983 * This instance holds the device's information (related to its vIOMMU) in a VM.
985 struct iommu_vdevice_alloc
{
989 __u32 out_vdevice_id
;
990 __aligned_u64 virt_id
;
992 #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
995 * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS)
996 * @size: sizeof(struct iommu_ioas_change_process)
997 * @__reserved: Must be 0
999 * This transfers pinned memory counts for every memory map in every IOAS
1000 * in the context to the current process. This only supports maps created
1001 * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present.
1002 * If the ioctl returns a failure status, then nothing is changed.
1004 * This API is useful for transferring operation of a device from one process
1005 * to another, such as during userland live update.
1007 struct iommu_ioas_change_process
{
1012 #define IOMMU_IOAS_CHANGE_PROCESS \
1013 _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)