1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
6 #include <uapi/linux/iommufd.h>
8 #include "arm-smmu-v3.h"
10 void *arm_smmu_hw_info(struct device
*dev
, u32
*length
, u32
*type
)
12 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
13 struct iommu_hw_info_arm_smmuv3
*info
;
14 u32 __iomem
*base_idr
;
17 info
= kzalloc(sizeof(*info
), GFP_KERNEL
);
19 return ERR_PTR(-ENOMEM
);
21 base_idr
= master
->smmu
->base
+ ARM_SMMU_IDR0
;
22 for (i
= 0; i
<= 5; i
++)
23 info
->idr
[i
] = readl_relaxed(base_idr
+ i
);
24 info
->iidr
= readl_relaxed(master
->smmu
->base
+ ARM_SMMU_IIDR
);
25 info
->aidr
= readl_relaxed(master
->smmu
->base
+ ARM_SMMU_AIDR
);
27 *length
= sizeof(*info
);
28 *type
= IOMMU_HW_INFO_TYPE_ARM_SMMUV3
;
33 static void arm_smmu_make_nested_cd_table_ste(
34 struct arm_smmu_ste
*target
, struct arm_smmu_master
*master
,
35 struct arm_smmu_nested_domain
*nested_domain
, bool ats_enabled
)
37 arm_smmu_make_s2_domain_ste(
38 target
, master
, nested_domain
->vsmmu
->s2_parent
, ats_enabled
);
40 target
->data
[0] = cpu_to_le64(STRTAB_STE_0_V
|
41 FIELD_PREP(STRTAB_STE_0_CFG
,
42 STRTAB_STE_0_CFG_NESTED
));
43 target
->data
[0] |= nested_domain
->ste
[0] &
44 ~cpu_to_le64(STRTAB_STE_0_CFG
);
45 target
->data
[1] |= nested_domain
->ste
[1];
49 * Create a physical STE from the virtual STE that userspace provided when it
50 * created the nested domain. Using the vSTE userspace can request:
53 * - Bypass STE (install the S2, no CD table)
54 * - CD table STE (install the S2 and the userspace CD table)
56 static void arm_smmu_make_nested_domain_ste(
57 struct arm_smmu_ste
*target
, struct arm_smmu_master
*master
,
58 struct arm_smmu_nested_domain
*nested_domain
, bool ats_enabled
)
61 FIELD_GET(STRTAB_STE_0_CFG
, le64_to_cpu(nested_domain
->ste
[0]));
64 * Userspace can request a non-valid STE through the nesting interface.
65 * We relay that into an abort physical STE with the intention that
66 * C_BAD_STE for this SID can be generated to userspace.
68 if (!(nested_domain
->ste
[0] & cpu_to_le64(STRTAB_STE_0_V
)))
69 cfg
= STRTAB_STE_0_CFG_ABORT
;
72 case STRTAB_STE_0_CFG_S1_TRANS
:
73 arm_smmu_make_nested_cd_table_ste(target
, master
, nested_domain
,
76 case STRTAB_STE_0_CFG_BYPASS
:
77 arm_smmu_make_s2_domain_ste(target
, master
,
78 nested_domain
->vsmmu
->s2_parent
,
81 case STRTAB_STE_0_CFG_ABORT
:
83 arm_smmu_make_abort_ste(target
);
88 static int arm_smmu_attach_dev_nested(struct iommu_domain
*domain
,
91 struct arm_smmu_nested_domain
*nested_domain
=
92 to_smmu_nested_domain(domain
);
93 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
94 struct arm_smmu_attach_state state
= {
96 .old_domain
= iommu_get_domain_for_dev(dev
),
97 .ssid
= IOMMU_NO_PASID
,
99 struct arm_smmu_ste ste
;
102 if (nested_domain
->vsmmu
->smmu
!= master
->smmu
)
104 if (arm_smmu_ssids_in_use(&master
->cd_table
))
107 mutex_lock(&arm_smmu_asid_lock
);
109 * The VM has to control the actual ATS state at the PCI device because
110 * we forward the invalidations directly from the VM. If the VM doesn't
111 * think ATS is on it will not generate ATC flushes and the ATC will
112 * become incoherent. Since we can't access the actual virtual PCI ATS
113 * config bit here base this off the EATS value in the STE. If the EATS
114 * is set then the VM must generate ATC flushes.
116 state
.disable_ats
= !nested_domain
->enable_ats
;
117 ret
= arm_smmu_attach_prepare(&state
, domain
);
119 mutex_unlock(&arm_smmu_asid_lock
);
123 arm_smmu_make_nested_domain_ste(&ste
, master
, nested_domain
,
125 arm_smmu_install_ste_for_dev(master
, &ste
);
126 arm_smmu_attach_commit(&state
);
127 mutex_unlock(&arm_smmu_asid_lock
);
131 static void arm_smmu_domain_nested_free(struct iommu_domain
*domain
)
133 kfree(to_smmu_nested_domain(domain
));
136 static const struct iommu_domain_ops arm_smmu_nested_ops
= {
137 .attach_dev
= arm_smmu_attach_dev_nested
,
138 .free
= arm_smmu_domain_nested_free
,
141 static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3
*arg
,
147 if (!(arg
->ste
[0] & cpu_to_le64(STRTAB_STE_0_V
))) {
148 memset(arg
->ste
, 0, sizeof(arg
->ste
));
152 /* EIO is reserved for invalid STE data. */
153 if ((arg
->ste
[0] & ~STRTAB_STE_0_NESTING_ALLOWED
) ||
154 (arg
->ste
[1] & ~STRTAB_STE_1_NESTING_ALLOWED
))
157 cfg
= FIELD_GET(STRTAB_STE_0_CFG
, le64_to_cpu(arg
->ste
[0]));
158 if (cfg
!= STRTAB_STE_0_CFG_ABORT
&& cfg
!= STRTAB_STE_0_CFG_BYPASS
&&
159 cfg
!= STRTAB_STE_0_CFG_S1_TRANS
)
163 * Only Full ATS or ATS UR is supported
164 * The EATS field will be set by arm_smmu_make_nested_domain_ste()
166 eats
= FIELD_GET(STRTAB_STE_1_EATS
, le64_to_cpu(arg
->ste
[1]));
167 arg
->ste
[1] &= ~cpu_to_le64(STRTAB_STE_1_EATS
);
168 if (eats
!= STRTAB_STE_1_EATS_ABT
&& eats
!= STRTAB_STE_1_EATS_TRANS
)
171 if (cfg
== STRTAB_STE_0_CFG_S1_TRANS
)
172 *enable_ats
= (eats
== STRTAB_STE_1_EATS_TRANS
);
176 static struct iommu_domain
*
177 arm_vsmmu_alloc_domain_nested(struct iommufd_viommu
*viommu
, u32 flags
,
178 const struct iommu_user_data
*user_data
)
180 struct arm_vsmmu
*vsmmu
= container_of(viommu
, struct arm_vsmmu
, core
);
181 const u32 SUPPORTED_FLAGS
= IOMMU_HWPT_FAULT_ID_VALID
;
182 struct arm_smmu_nested_domain
*nested_domain
;
183 struct iommu_hwpt_arm_smmuv3 arg
;
184 bool enable_ats
= false;
188 * Faults delivered to the nested domain are faults that originated by
189 * the S1 in the domain. The core code will match all PASIDs when
190 * delivering the fault due to user_pasid_table
192 if (flags
& ~SUPPORTED_FLAGS
)
193 return ERR_PTR(-EOPNOTSUPP
);
195 ret
= iommu_copy_struct_from_user(&arg
, user_data
,
196 IOMMU_HWPT_DATA_ARM_SMMUV3
, ste
);
200 ret
= arm_smmu_validate_vste(&arg
, &enable_ats
);
204 nested_domain
= kzalloc(sizeof(*nested_domain
), GFP_KERNEL_ACCOUNT
);
206 return ERR_PTR(-ENOMEM
);
208 nested_domain
->domain
.type
= IOMMU_DOMAIN_NESTED
;
209 nested_domain
->domain
.ops
= &arm_smmu_nested_ops
;
210 nested_domain
->enable_ats
= enable_ats
;
211 nested_domain
->vsmmu
= vsmmu
;
212 nested_domain
->ste
[0] = arg
.ste
[0];
213 nested_domain
->ste
[1] = arg
.ste
[1] & ~cpu_to_le64(STRTAB_STE_1_EATS
);
215 return &nested_domain
->domain
;
218 static int arm_vsmmu_vsid_to_sid(struct arm_vsmmu
*vsmmu
, u32 vsid
, u32
*sid
)
220 struct arm_smmu_master
*master
;
224 xa_lock(&vsmmu
->core
.vdevs
);
225 dev
= iommufd_viommu_find_dev(&vsmmu
->core
, (unsigned long)vsid
);
230 master
= dev_iommu_priv_get(dev
);
232 /* At this moment, iommufd only supports PCI device that has one SID */
234 *sid
= master
->streams
[0].id
;
236 xa_unlock(&vsmmu
->core
.vdevs
);
240 /* This is basically iommu_viommu_arm_smmuv3_invalidate in u64 for conversion */
241 struct arm_vsmmu_invalidation_cmd
{
244 struct iommu_viommu_arm_smmuv3_invalidate ucmd
;
249 * Convert, in place, the raw invalidation command into an internal format that
250 * can be passed to arm_smmu_cmdq_issue_cmdlist(). Internally commands are
251 * stored in CPU endian.
253 * Enforce the VMID or SID on the command.
255 static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu
*vsmmu
,
256 struct arm_vsmmu_invalidation_cmd
*cmd
)
258 /* Commands are le64 stored in u64 */
259 cmd
->cmd
[0] = le64_to_cpu(cmd
->ucmd
.cmd
[0]);
260 cmd
->cmd
[1] = le64_to_cpu(cmd
->ucmd
.cmd
[1]);
262 switch (cmd
->cmd
[0] & CMDQ_0_OP
) {
263 case CMDQ_OP_TLBI_NSNH_ALL
:
264 /* Convert to NH_ALL */
265 cmd
->cmd
[0] = CMDQ_OP_TLBI_NH_ALL
|
266 FIELD_PREP(CMDQ_TLBI_0_VMID
, vsmmu
->vmid
);
269 case CMDQ_OP_TLBI_NH_VA
:
270 case CMDQ_OP_TLBI_NH_VAA
:
271 case CMDQ_OP_TLBI_NH_ALL
:
272 case CMDQ_OP_TLBI_NH_ASID
:
273 cmd
->cmd
[0] &= ~CMDQ_TLBI_0_VMID
;
274 cmd
->cmd
[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID
, vsmmu
->vmid
);
276 case CMDQ_OP_ATC_INV
:
277 case CMDQ_OP_CFGI_CD
:
278 case CMDQ_OP_CFGI_CD_ALL
: {
279 u32 sid
, vsid
= FIELD_GET(CMDQ_CFGI_0_SID
, cmd
->cmd
[0]);
281 if (arm_vsmmu_vsid_to_sid(vsmmu
, vsid
, &sid
))
283 cmd
->cmd
[0] &= ~CMDQ_CFGI_0_SID
;
284 cmd
->cmd
[0] |= FIELD_PREP(CMDQ_CFGI_0_SID
, sid
);
293 static int arm_vsmmu_cache_invalidate(struct iommufd_viommu
*viommu
,
294 struct iommu_user_data_array
*array
)
296 struct arm_vsmmu
*vsmmu
= container_of(viommu
, struct arm_vsmmu
, core
);
297 struct arm_smmu_device
*smmu
= vsmmu
->smmu
;
298 struct arm_vsmmu_invalidation_cmd
*last
;
299 struct arm_vsmmu_invalidation_cmd
*cmds
;
300 struct arm_vsmmu_invalidation_cmd
*cur
;
301 struct arm_vsmmu_invalidation_cmd
*end
;
304 cmds
= kcalloc(array
->entry_num
, sizeof(*cmds
), GFP_KERNEL
);
308 end
= cmds
+ array
->entry_num
;
310 static_assert(sizeof(*cmds
) == 2 * sizeof(u64
));
311 ret
= iommu_copy_struct_from_full_user_array(
312 cmds
, sizeof(*cmds
), array
,
313 IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3
);
319 ret
= arm_vsmmu_convert_user_cmd(vsmmu
, cur
);
323 /* FIXME work in blocks of CMDQ_BATCH_ENTRIES and copy each block? */
325 if (cur
!= end
&& (cur
- last
) != CMDQ_BATCH_ENTRIES
- 1)
328 /* FIXME always uses the main cmdq rather than trying to group by type */
329 ret
= arm_smmu_cmdq_issue_cmdlist(smmu
, &smmu
->cmdq
, last
->cmd
,
338 array
->entry_num
= cur
- cmds
;
343 static const struct iommufd_viommu_ops arm_vsmmu_ops
= {
344 .alloc_domain_nested
= arm_vsmmu_alloc_domain_nested
,
345 .cache_invalidate
= arm_vsmmu_cache_invalidate
,
348 struct iommufd_viommu
*arm_vsmmu_alloc(struct device
*dev
,
349 struct iommu_domain
*parent
,
350 struct iommufd_ctx
*ictx
,
351 unsigned int viommu_type
)
353 struct arm_smmu_device
*smmu
=
354 iommu_get_iommu_dev(dev
, struct arm_smmu_device
, iommu
);
355 struct arm_smmu_master
*master
= dev_iommu_priv_get(dev
);
356 struct arm_smmu_domain
*s2_parent
= to_smmu_domain(parent
);
357 struct arm_vsmmu
*vsmmu
;
359 if (viommu_type
!= IOMMU_VIOMMU_TYPE_ARM_SMMUV3
)
360 return ERR_PTR(-EOPNOTSUPP
);
362 if (!(smmu
->features
& ARM_SMMU_FEAT_NESTING
))
363 return ERR_PTR(-EOPNOTSUPP
);
365 if (s2_parent
->smmu
!= master
->smmu
)
366 return ERR_PTR(-EINVAL
);
369 * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
370 * defect is needed to determine if arm_vsmmu_cache_invalidate() needs
371 * any change to remove this.
373 if (WARN_ON(smmu
->options
& ARM_SMMU_OPT_CMDQ_FORCE_SYNC
))
374 return ERR_PTR(-EOPNOTSUPP
);
377 * Must support some way to prevent the VM from bypassing the cache
378 * because VFIO currently does not do any cache maintenance. canwbs
379 * indicates the device is fully coherent and no cache maintenance is
380 * ever required, even for PCI No-Snoop. S2FWB means the S1 can't make
381 * things non-coherent using the memattr, but No-Snoop behavior is not
384 if (!arm_smmu_master_canwbs(master
) &&
385 !(smmu
->features
& ARM_SMMU_FEAT_S2FWB
))
386 return ERR_PTR(-EOPNOTSUPP
);
388 vsmmu
= iommufd_viommu_alloc(ictx
, struct arm_vsmmu
, core
,
391 return ERR_CAST(vsmmu
);
394 vsmmu
->s2_parent
= s2_parent
;
395 /* FIXME Move VMID allocation from the S2 domain allocation to here */
396 vsmmu
->vmid
= s2_parent
->s2_cfg
.vmid
;
401 MODULE_IMPORT_NS(IOMMUFD
);