1 // SPDX-License-Identifier: GPL-2.0-only
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/of_iommu.h>
35 #include <linux/pci.h>
36 #include <linux/platform_device.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/ratelimit.h>
39 #include <linux/slab.h>
41 #include <linux/amba/bus.h>
42 #include <linux/fsl/mc.h>
47 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
48 * global register space are still, in fact, using a hypervisor to mediate it
49 * by trapping and emulating register accesses. Sadly, some deployed versions
50 * of said trapping code have bugs wherein they go horribly wrong for stores
51 * using r31 (i.e. XZR/WZR) as the source register.
53 #define QCOM_DUMMY_VAL -1
55 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
56 #define TLB_SPIN_COUNT 10
58 #define MSI_IOVA_BASE 0x8000000
59 #define MSI_IOVA_LENGTH 0x100000
61 static int force_stage
;
62 module_param(force_stage
, int, S_IRUGO
);
63 MODULE_PARM_DESC(force_stage
,
64 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
65 static bool disable_bypass
=
66 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT
);
67 module_param(disable_bypass
, bool, S_IRUGO
);
68 MODULE_PARM_DESC(disable_bypass
,
69 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
71 struct arm_smmu_s2cr
{
72 struct iommu_group
*group
;
74 enum arm_smmu_s2cr_type type
;
75 enum arm_smmu_s2cr_privcfg privcfg
;
79 #define s2cr_init_val (struct arm_smmu_s2cr){ \
80 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
93 struct arm_smmu_cfg
*cfg
;
96 struct arm_smmu_master_cfg
{
97 struct arm_smmu_device
*smmu
;
100 #define INVALID_SMENDX -1
101 #define cfg_smendx(cfg, fw, i) \
102 (i >= fw->num_ids ? INVALID_SMENDX : cfg->smendx[i])
103 #define for_each_cfg_sme(cfg, fw, i, idx) \
104 for (i = 0; idx = cfg_smendx(cfg, fw, i), i < fw->num_ids; ++i)
106 static bool using_legacy_binding
, using_generic_binding
;
108 static inline int arm_smmu_rpm_get(struct arm_smmu_device
*smmu
)
110 if (pm_runtime_enabled(smmu
->dev
))
111 return pm_runtime_get_sync(smmu
->dev
);
116 static inline void arm_smmu_rpm_put(struct arm_smmu_device
*smmu
)
118 if (pm_runtime_enabled(smmu
->dev
))
119 pm_runtime_put_autosuspend(smmu
->dev
);
122 static struct arm_smmu_domain
*to_smmu_domain(struct iommu_domain
*dom
)
124 return container_of(dom
, struct arm_smmu_domain
, domain
);
127 static struct platform_driver arm_smmu_driver
;
128 static struct iommu_ops arm_smmu_ops
;
130 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
131 static int arm_smmu_bus_init(struct iommu_ops
*ops
);
133 static struct device_node
*dev_get_dev_node(struct device
*dev
)
135 if (dev_is_pci(dev
)) {
136 struct pci_bus
*bus
= to_pci_dev(dev
)->bus
;
138 while (!pci_is_root_bus(bus
))
140 return of_node_get(bus
->bridge
->parent
->of_node
);
143 return of_node_get(dev
->of_node
);
146 static int __arm_smmu_get_pci_sid(struct pci_dev
*pdev
, u16 alias
, void *data
)
148 *((__be32
*)data
) = cpu_to_be32(alias
);
149 return 0; /* Continue walking */
152 static int __find_legacy_master_phandle(struct device
*dev
, void *data
)
154 struct of_phandle_iterator
*it
= *(void **)data
;
155 struct device_node
*np
= it
->node
;
158 of_for_each_phandle(it
, err
, dev
->of_node
, "mmu-masters",
159 "#stream-id-cells", -1)
160 if (it
->node
== np
) {
161 *(void **)data
= dev
;
165 return err
== -ENOENT
? 0 : err
;
168 static int arm_smmu_register_legacy_master(struct device
*dev
,
169 struct arm_smmu_device
**smmu
)
171 struct device
*smmu_dev
;
172 struct device_node
*np
;
173 struct of_phandle_iterator it
;
179 np
= dev_get_dev_node(dev
);
180 if (!np
|| !of_find_property(np
, "#stream-id-cells", NULL
)) {
186 err
= driver_for_each_device(&arm_smmu_driver
.driver
, NULL
, &data
,
187 __find_legacy_master_phandle
);
195 if (dev_is_pci(dev
)) {
196 /* "mmu-masters" assumes Stream ID == Requester ID */
197 pci_for_each_dma_alias(to_pci_dev(dev
), __arm_smmu_get_pci_sid
,
203 err
= iommu_fwspec_init(dev
, &smmu_dev
->of_node
->fwnode
,
208 sids
= kcalloc(it
.cur_count
, sizeof(*sids
), GFP_KERNEL
);
212 *smmu
= dev_get_drvdata(smmu_dev
);
213 of_phandle_iterator_args(&it
, sids
, it
.cur_count
);
214 err
= iommu_fwspec_add_ids(dev
, sids
, it
.cur_count
);
220 * With the legacy DT binding in play, we have no guarantees about
221 * probe order, but then we're also not doing default domains, so we can
222 * delay setting bus ops until we're sure every possible SMMU is ready,
223 * and that way ensure that no probe_device() calls get missed.
225 static int arm_smmu_legacy_bus_init(void)
227 if (using_legacy_binding
)
228 return arm_smmu_bus_init(&arm_smmu_ops
);
231 device_initcall_sync(arm_smmu_legacy_bus_init
);
233 static int arm_smmu_register_legacy_master(struct device
*dev
,
234 struct arm_smmu_device
**smmu
)
238 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
240 static int __arm_smmu_alloc_bitmap(unsigned long *map
, int start
, int end
)
245 idx
= find_next_zero_bit(map
, end
, start
);
248 } while (test_and_set_bit(idx
, map
));
253 static void __arm_smmu_free_bitmap(unsigned long *map
, int idx
)
258 /* Wait for any pending TLB invalidations to complete */
259 static void __arm_smmu_tlb_sync(struct arm_smmu_device
*smmu
, int page
,
260 int sync
, int status
)
262 unsigned int spin_cnt
, delay
;
265 if (smmu
->impl
&& unlikely(smmu
->impl
->tlb_sync
))
266 return smmu
->impl
->tlb_sync(smmu
, page
, sync
, status
);
268 arm_smmu_writel(smmu
, page
, sync
, QCOM_DUMMY_VAL
);
269 for (delay
= 1; delay
< TLB_LOOP_TIMEOUT
; delay
*= 2) {
270 for (spin_cnt
= TLB_SPIN_COUNT
; spin_cnt
> 0; spin_cnt
--) {
271 reg
= arm_smmu_readl(smmu
, page
, status
);
272 if (!(reg
& ARM_SMMU_sTLBGSTATUS_GSACTIVE
))
278 dev_err_ratelimited(smmu
->dev
,
279 "TLB sync timed out -- SMMU may be deadlocked\n");
282 static void arm_smmu_tlb_sync_global(struct arm_smmu_device
*smmu
)
286 spin_lock_irqsave(&smmu
->global_sync_lock
, flags
);
287 __arm_smmu_tlb_sync(smmu
, ARM_SMMU_GR0
, ARM_SMMU_GR0_sTLBGSYNC
,
288 ARM_SMMU_GR0_sTLBGSTATUS
);
289 spin_unlock_irqrestore(&smmu
->global_sync_lock
, flags
);
292 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain
*smmu_domain
)
294 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
297 spin_lock_irqsave(&smmu_domain
->cb_lock
, flags
);
298 __arm_smmu_tlb_sync(smmu
, ARM_SMMU_CB(smmu
, smmu_domain
->cfg
.cbndx
),
299 ARM_SMMU_CB_TLBSYNC
, ARM_SMMU_CB_TLBSTATUS
);
300 spin_unlock_irqrestore(&smmu_domain
->cb_lock
, flags
);
303 static void arm_smmu_tlb_inv_context_s1(void *cookie
)
305 struct arm_smmu_domain
*smmu_domain
= cookie
;
307 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
308 * current CPU are visible beforehand.
311 arm_smmu_cb_write(smmu_domain
->smmu
, smmu_domain
->cfg
.cbndx
,
312 ARM_SMMU_CB_S1_TLBIASID
, smmu_domain
->cfg
.asid
);
313 arm_smmu_tlb_sync_context(smmu_domain
);
316 static void arm_smmu_tlb_inv_context_s2(void *cookie
)
318 struct arm_smmu_domain
*smmu_domain
= cookie
;
319 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
323 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_TLBIVMID
, smmu_domain
->cfg
.vmid
);
324 arm_smmu_tlb_sync_global(smmu
);
327 static void arm_smmu_tlb_inv_range_s1(unsigned long iova
, size_t size
,
328 size_t granule
, void *cookie
, int reg
)
330 struct arm_smmu_domain
*smmu_domain
= cookie
;
331 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
332 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
333 int idx
= cfg
->cbndx
;
335 if (smmu
->features
& ARM_SMMU_FEAT_COHERENT_WALK
)
338 if (cfg
->fmt
!= ARM_SMMU_CTX_FMT_AARCH64
) {
339 iova
= (iova
>> 12) << 12;
342 arm_smmu_cb_write(smmu
, idx
, reg
, iova
);
344 } while (size
-= granule
);
347 iova
|= (u64
)cfg
->asid
<< 48;
349 arm_smmu_cb_writeq(smmu
, idx
, reg
, iova
);
350 iova
+= granule
>> 12;
351 } while (size
-= granule
);
355 static void arm_smmu_tlb_inv_range_s2(unsigned long iova
, size_t size
,
356 size_t granule
, void *cookie
, int reg
)
358 struct arm_smmu_domain
*smmu_domain
= cookie
;
359 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
360 int idx
= smmu_domain
->cfg
.cbndx
;
362 if (smmu
->features
& ARM_SMMU_FEAT_COHERENT_WALK
)
367 if (smmu_domain
->cfg
.fmt
== ARM_SMMU_CTX_FMT_AARCH64
)
368 arm_smmu_cb_writeq(smmu
, idx
, reg
, iova
);
370 arm_smmu_cb_write(smmu
, idx
, reg
, iova
);
371 iova
+= granule
>> 12;
372 } while (size
-= granule
);
375 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova
, size_t size
,
376 size_t granule
, void *cookie
)
378 arm_smmu_tlb_inv_range_s1(iova
, size
, granule
, cookie
,
379 ARM_SMMU_CB_S1_TLBIVA
);
380 arm_smmu_tlb_sync_context(cookie
);
383 static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova
, size_t size
,
384 size_t granule
, void *cookie
)
386 arm_smmu_tlb_inv_range_s1(iova
, size
, granule
, cookie
,
387 ARM_SMMU_CB_S1_TLBIVAL
);
388 arm_smmu_tlb_sync_context(cookie
);
391 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather
*gather
,
392 unsigned long iova
, size_t granule
,
395 arm_smmu_tlb_inv_range_s1(iova
, granule
, granule
, cookie
,
396 ARM_SMMU_CB_S1_TLBIVAL
);
399 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova
, size_t size
,
400 size_t granule
, void *cookie
)
402 arm_smmu_tlb_inv_range_s2(iova
, size
, granule
, cookie
,
403 ARM_SMMU_CB_S2_TLBIIPAS2
);
404 arm_smmu_tlb_sync_context(cookie
);
407 static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova
, size_t size
,
408 size_t granule
, void *cookie
)
410 arm_smmu_tlb_inv_range_s2(iova
, size
, granule
, cookie
,
411 ARM_SMMU_CB_S2_TLBIIPAS2L
);
412 arm_smmu_tlb_sync_context(cookie
);
415 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather
*gather
,
416 unsigned long iova
, size_t granule
,
419 arm_smmu_tlb_inv_range_s2(iova
, granule
, granule
, cookie
,
420 ARM_SMMU_CB_S2_TLBIIPAS2L
);
423 static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova
, size_t size
,
424 size_t granule
, void *cookie
)
426 arm_smmu_tlb_inv_context_s2(cookie
);
429 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
430 * almost negligible, but the benefit of getting the first one in as far ahead
431 * of the sync as possible is significant, hence we don't just make this a
432 * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
435 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather
*gather
,
436 unsigned long iova
, size_t granule
,
439 struct arm_smmu_domain
*smmu_domain
= cookie
;
440 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
442 if (smmu
->features
& ARM_SMMU_FEAT_COHERENT_WALK
)
445 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_TLBIVMID
, smmu_domain
->cfg
.vmid
);
448 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops
= {
449 .tlb_flush_all
= arm_smmu_tlb_inv_context_s1
,
450 .tlb_flush_walk
= arm_smmu_tlb_inv_walk_s1
,
451 .tlb_flush_leaf
= arm_smmu_tlb_inv_leaf_s1
,
452 .tlb_add_page
= arm_smmu_tlb_add_page_s1
,
455 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2
= {
456 .tlb_flush_all
= arm_smmu_tlb_inv_context_s2
,
457 .tlb_flush_walk
= arm_smmu_tlb_inv_walk_s2
,
458 .tlb_flush_leaf
= arm_smmu_tlb_inv_leaf_s2
,
459 .tlb_add_page
= arm_smmu_tlb_add_page_s2
,
462 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1
= {
463 .tlb_flush_all
= arm_smmu_tlb_inv_context_s2
,
464 .tlb_flush_walk
= arm_smmu_tlb_inv_any_s2_v1
,
465 .tlb_flush_leaf
= arm_smmu_tlb_inv_any_s2_v1
,
466 .tlb_add_page
= arm_smmu_tlb_add_page_s2_v1
,
469 static irqreturn_t
arm_smmu_context_fault(int irq
, void *dev
)
471 u32 fsr
, fsynr
, cbfrsynra
;
473 struct iommu_domain
*domain
= dev
;
474 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
475 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
476 int idx
= smmu_domain
->cfg
.cbndx
;
478 fsr
= arm_smmu_cb_read(smmu
, idx
, ARM_SMMU_CB_FSR
);
479 if (!(fsr
& ARM_SMMU_FSR_FAULT
))
482 fsynr
= arm_smmu_cb_read(smmu
, idx
, ARM_SMMU_CB_FSYNR0
);
483 iova
= arm_smmu_cb_readq(smmu
, idx
, ARM_SMMU_CB_FAR
);
484 cbfrsynra
= arm_smmu_gr1_read(smmu
, ARM_SMMU_GR1_CBFRSYNRA(idx
));
486 dev_err_ratelimited(smmu
->dev
,
487 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
488 fsr
, iova
, fsynr
, cbfrsynra
, idx
);
490 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_FSR
, fsr
);
494 static irqreturn_t
arm_smmu_global_fault(int irq
, void *dev
)
496 u32 gfsr
, gfsynr0
, gfsynr1
, gfsynr2
;
497 struct arm_smmu_device
*smmu
= dev
;
498 static DEFINE_RATELIMIT_STATE(rs
, DEFAULT_RATELIMIT_INTERVAL
,
499 DEFAULT_RATELIMIT_BURST
);
501 gfsr
= arm_smmu_gr0_read(smmu
, ARM_SMMU_GR0_sGFSR
);
502 gfsynr0
= arm_smmu_gr0_read(smmu
, ARM_SMMU_GR0_sGFSYNR0
);
503 gfsynr1
= arm_smmu_gr0_read(smmu
, ARM_SMMU_GR0_sGFSYNR1
);
504 gfsynr2
= arm_smmu_gr0_read(smmu
, ARM_SMMU_GR0_sGFSYNR2
);
509 if (__ratelimit(&rs
)) {
510 if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT
) &&
511 (gfsr
& ARM_SMMU_sGFSR_USF
))
513 "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
517 "Unexpected global fault, this could be serious\n");
519 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
520 gfsr
, gfsynr0
, gfsynr1
, gfsynr2
);
523 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_sGFSR
, gfsr
);
527 static void arm_smmu_init_context_bank(struct arm_smmu_domain
*smmu_domain
,
528 struct io_pgtable_cfg
*pgtbl_cfg
)
530 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
531 struct arm_smmu_cb
*cb
= &smmu_domain
->smmu
->cbs
[cfg
->cbndx
];
532 bool stage1
= cfg
->cbar
!= CBAR_TYPE_S2_TRANS
;
538 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_S
) {
539 cb
->tcr
[0] = pgtbl_cfg
->arm_v7s_cfg
.tcr
;
541 cb
->tcr
[0] = arm_smmu_lpae_tcr(pgtbl_cfg
);
542 cb
->tcr
[1] = arm_smmu_lpae_tcr2(pgtbl_cfg
);
543 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
)
544 cb
->tcr
[1] |= ARM_SMMU_TCR2_AS
;
546 cb
->tcr
[0] |= ARM_SMMU_TCR_EAE
;
549 cb
->tcr
[0] = arm_smmu_lpae_vtcr(pgtbl_cfg
);
554 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_S
) {
555 cb
->ttbr
[0] = pgtbl_cfg
->arm_v7s_cfg
.ttbr
;
558 cb
->ttbr
[0] = pgtbl_cfg
->arm_lpae_s1_cfg
.ttbr
;
559 cb
->ttbr
[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID
,
561 cb
->ttbr
[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID
,
565 cb
->ttbr
[0] = pgtbl_cfg
->arm_lpae_s2_cfg
.vttbr
;
568 /* MAIRs (stage-1 only) */
570 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_S
) {
571 cb
->mair
[0] = pgtbl_cfg
->arm_v7s_cfg
.prrr
;
572 cb
->mair
[1] = pgtbl_cfg
->arm_v7s_cfg
.nmrr
;
574 cb
->mair
[0] = pgtbl_cfg
->arm_lpae_s1_cfg
.mair
;
575 cb
->mair
[1] = pgtbl_cfg
->arm_lpae_s1_cfg
.mair
>> 32;
580 static void arm_smmu_write_context_bank(struct arm_smmu_device
*smmu
, int idx
)
584 struct arm_smmu_cb
*cb
= &smmu
->cbs
[idx
];
585 struct arm_smmu_cfg
*cfg
= cb
->cfg
;
587 /* Unassigned context banks only need disabling */
589 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_SCTLR
, 0);
593 stage1
= cfg
->cbar
!= CBAR_TYPE_S2_TRANS
;
596 if (smmu
->version
> ARM_SMMU_V1
) {
597 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
)
598 reg
= ARM_SMMU_CBA2R_VA64
;
601 /* 16-bit VMIDs live in CBA2R */
602 if (smmu
->features
& ARM_SMMU_FEAT_VMID16
)
603 reg
|= FIELD_PREP(ARM_SMMU_CBA2R_VMID16
, cfg
->vmid
);
605 arm_smmu_gr1_write(smmu
, ARM_SMMU_GR1_CBA2R(idx
), reg
);
609 reg
= FIELD_PREP(ARM_SMMU_CBAR_TYPE
, cfg
->cbar
);
610 if (smmu
->version
< ARM_SMMU_V2
)
611 reg
|= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX
, cfg
->irptndx
);
614 * Use the weakest shareability/memory types, so they are
615 * overridden by the ttbcr/pte.
618 reg
|= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG
,
619 ARM_SMMU_CBAR_S1_BPSHCFG_NSH
) |
620 FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR
,
621 ARM_SMMU_CBAR_S1_MEMATTR_WB
);
622 } else if (!(smmu
->features
& ARM_SMMU_FEAT_VMID16
)) {
623 /* 8-bit VMIDs live in CBAR */
624 reg
|= FIELD_PREP(ARM_SMMU_CBAR_VMID
, cfg
->vmid
);
626 arm_smmu_gr1_write(smmu
, ARM_SMMU_GR1_CBAR(idx
), reg
);
630 * We must write this before the TTBRs, since it determines the
631 * access behaviour of some fields (in particular, ASID[15:8]).
633 if (stage1
&& smmu
->version
> ARM_SMMU_V1
)
634 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_TCR2
, cb
->tcr
[1]);
635 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_TCR
, cb
->tcr
[0]);
638 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_S
) {
639 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_CONTEXTIDR
, cfg
->asid
);
640 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_TTBR0
, cb
->ttbr
[0]);
641 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_TTBR1
, cb
->ttbr
[1]);
643 arm_smmu_cb_writeq(smmu
, idx
, ARM_SMMU_CB_TTBR0
, cb
->ttbr
[0]);
645 arm_smmu_cb_writeq(smmu
, idx
, ARM_SMMU_CB_TTBR1
,
649 /* MAIRs (stage-1 only) */
651 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_S1_MAIR0
, cb
->mair
[0]);
652 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_S1_MAIR1
, cb
->mair
[1]);
656 reg
= ARM_SMMU_SCTLR_CFIE
| ARM_SMMU_SCTLR_CFRE
| ARM_SMMU_SCTLR_AFE
|
657 ARM_SMMU_SCTLR_TRE
| ARM_SMMU_SCTLR_M
;
659 reg
|= ARM_SMMU_SCTLR_S1_ASIDPNE
;
660 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN
))
661 reg
|= ARM_SMMU_SCTLR_E
;
663 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_SCTLR
, reg
);
666 static int arm_smmu_init_domain_context(struct iommu_domain
*domain
,
667 struct arm_smmu_device
*smmu
)
669 int irq
, start
, ret
= 0;
670 unsigned long ias
, oas
;
671 struct io_pgtable_ops
*pgtbl_ops
;
672 struct io_pgtable_cfg pgtbl_cfg
;
673 enum io_pgtable_fmt fmt
;
674 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
675 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
677 mutex_lock(&smmu_domain
->init_mutex
);
678 if (smmu_domain
->smmu
)
681 if (domain
->type
== IOMMU_DOMAIN_IDENTITY
) {
682 smmu_domain
->stage
= ARM_SMMU_DOMAIN_BYPASS
;
683 smmu_domain
->smmu
= smmu
;
688 * Mapping the requested stage onto what we support is surprisingly
689 * complicated, mainly because the spec allows S1+S2 SMMUs without
690 * support for nested translation. That means we end up with the
693 * Requested Supported Actual
703 * Note that you can't actually request stage-2 mappings.
705 if (!(smmu
->features
& ARM_SMMU_FEAT_TRANS_S1
))
706 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S2
;
707 if (!(smmu
->features
& ARM_SMMU_FEAT_TRANS_S2
))
708 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S1
;
711 * Choosing a suitable context format is even more fiddly. Until we
712 * grow some way for the caller to express a preference, and/or move
713 * the decision into the io-pgtable code where it arguably belongs,
714 * just aim for the closest thing to the rest of the system, and hope
715 * that the hardware isn't esoteric enough that we can't assume AArch64
716 * support to be a superset of AArch32 support...
718 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH32_L
)
719 cfg
->fmt
= ARM_SMMU_CTX_FMT_AARCH32_L
;
720 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S
) &&
721 !IS_ENABLED(CONFIG_64BIT
) && !IS_ENABLED(CONFIG_ARM_LPAE
) &&
722 (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH32_S
) &&
723 (smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
))
724 cfg
->fmt
= ARM_SMMU_CTX_FMT_AARCH32_S
;
725 if ((IS_ENABLED(CONFIG_64BIT
) || cfg
->fmt
== ARM_SMMU_CTX_FMT_NONE
) &&
726 (smmu
->features
& (ARM_SMMU_FEAT_FMT_AARCH64_64K
|
727 ARM_SMMU_FEAT_FMT_AARCH64_16K
|
728 ARM_SMMU_FEAT_FMT_AARCH64_4K
)))
729 cfg
->fmt
= ARM_SMMU_CTX_FMT_AARCH64
;
731 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_NONE
) {
736 switch (smmu_domain
->stage
) {
737 case ARM_SMMU_DOMAIN_S1
:
738 cfg
->cbar
= CBAR_TYPE_S1_TRANS_S2_BYPASS
;
739 start
= smmu
->num_s2_context_banks
;
741 oas
= smmu
->ipa_size
;
742 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
) {
743 fmt
= ARM_64_LPAE_S1
;
744 } else if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH32_L
) {
745 fmt
= ARM_32_LPAE_S1
;
746 ias
= min(ias
, 32UL);
747 oas
= min(oas
, 40UL);
750 ias
= min(ias
, 32UL);
751 oas
= min(oas
, 32UL);
753 smmu_domain
->flush_ops
= &arm_smmu_s1_tlb_ops
;
755 case ARM_SMMU_DOMAIN_NESTED
:
757 * We will likely want to change this if/when KVM gets
760 case ARM_SMMU_DOMAIN_S2
:
761 cfg
->cbar
= CBAR_TYPE_S2_TRANS
;
763 ias
= smmu
->ipa_size
;
765 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
) {
766 fmt
= ARM_64_LPAE_S2
;
768 fmt
= ARM_32_LPAE_S2
;
769 ias
= min(ias
, 40UL);
770 oas
= min(oas
, 40UL);
772 if (smmu
->version
== ARM_SMMU_V2
)
773 smmu_domain
->flush_ops
= &arm_smmu_s2_tlb_ops_v2
;
775 smmu_domain
->flush_ops
= &arm_smmu_s2_tlb_ops_v1
;
781 ret
= __arm_smmu_alloc_bitmap(smmu
->context_map
, start
,
782 smmu
->num_context_banks
);
787 if (smmu
->version
< ARM_SMMU_V2
) {
788 cfg
->irptndx
= atomic_inc_return(&smmu
->irptndx
);
789 cfg
->irptndx
%= smmu
->num_context_irqs
;
791 cfg
->irptndx
= cfg
->cbndx
;
794 if (smmu_domain
->stage
== ARM_SMMU_DOMAIN_S2
)
795 cfg
->vmid
= cfg
->cbndx
+ 1;
797 cfg
->asid
= cfg
->cbndx
;
799 smmu_domain
->smmu
= smmu
;
800 if (smmu
->impl
&& smmu
->impl
->init_context
) {
801 ret
= smmu
->impl
->init_context(smmu_domain
);
806 pgtbl_cfg
= (struct io_pgtable_cfg
) {
807 .pgsize_bitmap
= smmu
->pgsize_bitmap
,
810 .coherent_walk
= smmu
->features
& ARM_SMMU_FEAT_COHERENT_WALK
,
811 .tlb
= smmu_domain
->flush_ops
,
812 .iommu_dev
= smmu
->dev
,
815 if (smmu_domain
->non_strict
)
816 pgtbl_cfg
.quirks
|= IO_PGTABLE_QUIRK_NON_STRICT
;
818 pgtbl_ops
= alloc_io_pgtable_ops(fmt
, &pgtbl_cfg
, smmu_domain
);
824 /* Update the domain's page sizes to reflect the page table format */
825 domain
->pgsize_bitmap
= pgtbl_cfg
.pgsize_bitmap
;
826 domain
->geometry
.aperture_end
= (1UL << ias
) - 1;
827 domain
->geometry
.force_aperture
= true;
829 /* Initialise the context bank with our page table cfg */
830 arm_smmu_init_context_bank(smmu_domain
, &pgtbl_cfg
);
831 arm_smmu_write_context_bank(smmu
, cfg
->cbndx
);
834 * Request context fault interrupt. Do this last to avoid the
835 * handler seeing a half-initialised domain state.
837 irq
= smmu
->irqs
[smmu
->num_global_irqs
+ cfg
->irptndx
];
838 ret
= devm_request_irq(smmu
->dev
, irq
, arm_smmu_context_fault
,
839 IRQF_SHARED
, "arm-smmu-context-fault", domain
);
841 dev_err(smmu
->dev
, "failed to request context IRQ %d (%u)\n",
843 cfg
->irptndx
= ARM_SMMU_INVALID_IRPTNDX
;
846 mutex_unlock(&smmu_domain
->init_mutex
);
848 /* Publish page table ops for map/unmap */
849 smmu_domain
->pgtbl_ops
= pgtbl_ops
;
853 __arm_smmu_free_bitmap(smmu
->context_map
, cfg
->cbndx
);
854 smmu_domain
->smmu
= NULL
;
856 mutex_unlock(&smmu_domain
->init_mutex
);
860 static void arm_smmu_destroy_domain_context(struct iommu_domain
*domain
)
862 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
863 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
864 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
867 if (!smmu
|| domain
->type
== IOMMU_DOMAIN_IDENTITY
)
870 ret
= arm_smmu_rpm_get(smmu
);
875 * Disable the context bank and free the page tables before freeing
878 smmu
->cbs
[cfg
->cbndx
].cfg
= NULL
;
879 arm_smmu_write_context_bank(smmu
, cfg
->cbndx
);
881 if (cfg
->irptndx
!= ARM_SMMU_INVALID_IRPTNDX
) {
882 irq
= smmu
->irqs
[smmu
->num_global_irqs
+ cfg
->irptndx
];
883 devm_free_irq(smmu
->dev
, irq
, domain
);
886 free_io_pgtable_ops(smmu_domain
->pgtbl_ops
);
887 __arm_smmu_free_bitmap(smmu
->context_map
, cfg
->cbndx
);
889 arm_smmu_rpm_put(smmu
);
892 static struct iommu_domain
*arm_smmu_domain_alloc(unsigned type
)
894 struct arm_smmu_domain
*smmu_domain
;
896 if (type
!= IOMMU_DOMAIN_UNMANAGED
&&
897 type
!= IOMMU_DOMAIN_DMA
&&
898 type
!= IOMMU_DOMAIN_IDENTITY
)
901 * Allocate the domain and initialise some of its data structures.
902 * We can't really do anything meaningful until we've added a
905 smmu_domain
= kzalloc(sizeof(*smmu_domain
), GFP_KERNEL
);
909 if (type
== IOMMU_DOMAIN_DMA
&& (using_legacy_binding
||
910 iommu_get_dma_cookie(&smmu_domain
->domain
))) {
915 mutex_init(&smmu_domain
->init_mutex
);
916 spin_lock_init(&smmu_domain
->cb_lock
);
918 return &smmu_domain
->domain
;
921 static void arm_smmu_domain_free(struct iommu_domain
*domain
)
923 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
926 * Free the domain resources. We assume that all devices have
927 * already been detached.
929 iommu_put_dma_cookie(domain
);
930 arm_smmu_destroy_domain_context(domain
);
934 static void arm_smmu_write_smr(struct arm_smmu_device
*smmu
, int idx
)
936 struct arm_smmu_smr
*smr
= smmu
->smrs
+ idx
;
937 u32 reg
= FIELD_PREP(ARM_SMMU_SMR_ID
, smr
->id
) |
938 FIELD_PREP(ARM_SMMU_SMR_MASK
, smr
->mask
);
940 if (!(smmu
->features
& ARM_SMMU_FEAT_EXIDS
) && smr
->valid
)
941 reg
|= ARM_SMMU_SMR_VALID
;
942 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_SMR(idx
), reg
);
945 static void arm_smmu_write_s2cr(struct arm_smmu_device
*smmu
, int idx
)
947 struct arm_smmu_s2cr
*s2cr
= smmu
->s2crs
+ idx
;
948 u32 reg
= FIELD_PREP(ARM_SMMU_S2CR_TYPE
, s2cr
->type
) |
949 FIELD_PREP(ARM_SMMU_S2CR_CBNDX
, s2cr
->cbndx
) |
950 FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG
, s2cr
->privcfg
);
952 if (smmu
->features
& ARM_SMMU_FEAT_EXIDS
&& smmu
->smrs
&&
953 smmu
->smrs
[idx
].valid
)
954 reg
|= ARM_SMMU_S2CR_EXIDVALID
;
955 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_S2CR(idx
), reg
);
958 static void arm_smmu_write_sme(struct arm_smmu_device
*smmu
, int idx
)
960 arm_smmu_write_s2cr(smmu
, idx
);
962 arm_smmu_write_smr(smmu
, idx
);
966 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
967 * should be called after sCR0 is written.
969 static void arm_smmu_test_smr_masks(struct arm_smmu_device
*smmu
)
977 * If we've had to accommodate firmware memory regions, we may
978 * have live SMRs by now; tread carefully...
980 * Somewhat perversely, not having a free SMR for this test implies we
981 * can get away without it anyway, as we'll only be able to 'allocate'
982 * these SMRs for the ID/mask values we're already trusting to be OK.
984 for (i
= 0; i
< smmu
->num_mapping_groups
; i
++)
985 if (!smmu
->smrs
[i
].valid
)
990 * SMR.ID bits may not be preserved if the corresponding MASK
991 * bits are set, so check each one separately. We can reject
992 * masters later if they try to claim IDs outside these masks.
994 smr
= FIELD_PREP(ARM_SMMU_SMR_ID
, smmu
->streamid_mask
);
995 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_SMR(i
), smr
);
996 smr
= arm_smmu_gr0_read(smmu
, ARM_SMMU_GR0_SMR(i
));
997 smmu
->streamid_mask
= FIELD_GET(ARM_SMMU_SMR_ID
, smr
);
999 smr
= FIELD_PREP(ARM_SMMU_SMR_MASK
, smmu
->streamid_mask
);
1000 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_SMR(i
), smr
);
1001 smr
= arm_smmu_gr0_read(smmu
, ARM_SMMU_GR0_SMR(i
));
1002 smmu
->smr_mask_mask
= FIELD_GET(ARM_SMMU_SMR_MASK
, smr
);
1005 static int arm_smmu_find_sme(struct arm_smmu_device
*smmu
, u16 id
, u16 mask
)
1007 struct arm_smmu_smr
*smrs
= smmu
->smrs
;
1008 int i
, free_idx
= -ENOSPC
;
1010 /* Stream indexing is blissfully easy */
1014 /* Validating SMRs is... less so */
1015 for (i
= 0; i
< smmu
->num_mapping_groups
; ++i
) {
1016 if (!smrs
[i
].valid
) {
1018 * Note the first free entry we come across, which
1019 * we'll claim in the end if nothing else matches.
1026 * If the new entry is _entirely_ matched by an existing entry,
1027 * then reuse that, with the guarantee that there also cannot
1028 * be any subsequent conflicting entries. In normal use we'd
1029 * expect simply identical entries for this case, but there's
1030 * no harm in accommodating the generalisation.
1032 if ((mask
& smrs
[i
].mask
) == mask
&&
1033 !((id
^ smrs
[i
].id
) & ~smrs
[i
].mask
))
1036 * If the new entry has any other overlap with an existing one,
1037 * though, then there always exists at least one stream ID
1038 * which would cause a conflict, and we can't allow that risk.
1040 if (!((id
^ smrs
[i
].id
) & ~(smrs
[i
].mask
| mask
)))
1047 static bool arm_smmu_free_sme(struct arm_smmu_device
*smmu
, int idx
)
1049 if (--smmu
->s2crs
[idx
].count
)
1052 smmu
->s2crs
[idx
] = s2cr_init_val
;
1054 smmu
->smrs
[idx
].valid
= false;
1059 static int arm_smmu_master_alloc_smes(struct device
*dev
)
1061 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
1062 struct arm_smmu_master_cfg
*cfg
= dev_iommu_priv_get(dev
);
1063 struct arm_smmu_device
*smmu
= cfg
->smmu
;
1064 struct arm_smmu_smr
*smrs
= smmu
->smrs
;
1067 mutex_lock(&smmu
->stream_map_mutex
);
1068 /* Figure out a viable stream map entry allocation */
1069 for_each_cfg_sme(cfg
, fwspec
, i
, idx
) {
1070 u16 sid
= FIELD_GET(ARM_SMMU_SMR_ID
, fwspec
->ids
[i
]);
1071 u16 mask
= FIELD_GET(ARM_SMMU_SMR_MASK
, fwspec
->ids
[i
]);
1073 if (idx
!= INVALID_SMENDX
) {
1078 ret
= arm_smmu_find_sme(smmu
, sid
, mask
);
1083 if (smrs
&& smmu
->s2crs
[idx
].count
== 0) {
1085 smrs
[idx
].mask
= mask
;
1086 smrs
[idx
].valid
= true;
1088 smmu
->s2crs
[idx
].count
++;
1089 cfg
->smendx
[i
] = (s16
)idx
;
1092 /* It worked! Now, poke the actual hardware */
1093 for_each_cfg_sme(cfg
, fwspec
, i
, idx
)
1094 arm_smmu_write_sme(smmu
, idx
);
1096 mutex_unlock(&smmu
->stream_map_mutex
);
1101 arm_smmu_free_sme(smmu
, cfg
->smendx
[i
]);
1102 cfg
->smendx
[i
] = INVALID_SMENDX
;
1104 mutex_unlock(&smmu
->stream_map_mutex
);
1108 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg
*cfg
,
1109 struct iommu_fwspec
*fwspec
)
1111 struct arm_smmu_device
*smmu
= cfg
->smmu
;
1114 mutex_lock(&smmu
->stream_map_mutex
);
1115 for_each_cfg_sme(cfg
, fwspec
, i
, idx
) {
1116 if (arm_smmu_free_sme(smmu
, idx
))
1117 arm_smmu_write_sme(smmu
, idx
);
1118 cfg
->smendx
[i
] = INVALID_SMENDX
;
1120 mutex_unlock(&smmu
->stream_map_mutex
);
1123 static int arm_smmu_domain_add_master(struct arm_smmu_domain
*smmu_domain
,
1124 struct arm_smmu_master_cfg
*cfg
,
1125 struct iommu_fwspec
*fwspec
)
1127 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
1128 struct arm_smmu_s2cr
*s2cr
= smmu
->s2crs
;
1129 u8 cbndx
= smmu_domain
->cfg
.cbndx
;
1130 enum arm_smmu_s2cr_type type
;
1133 if (smmu_domain
->stage
== ARM_SMMU_DOMAIN_BYPASS
)
1134 type
= S2CR_TYPE_BYPASS
;
1136 type
= S2CR_TYPE_TRANS
;
1138 for_each_cfg_sme(cfg
, fwspec
, i
, idx
) {
1139 if (type
== s2cr
[idx
].type
&& cbndx
== s2cr
[idx
].cbndx
)
1142 s2cr
[idx
].type
= type
;
1143 s2cr
[idx
].privcfg
= S2CR_PRIVCFG_DEFAULT
;
1144 s2cr
[idx
].cbndx
= cbndx
;
1145 arm_smmu_write_s2cr(smmu
, idx
);
1150 static int arm_smmu_attach_dev(struct iommu_domain
*domain
, struct device
*dev
)
1152 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1153 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
1154 struct arm_smmu_master_cfg
*cfg
;
1155 struct arm_smmu_device
*smmu
;
1158 if (!fwspec
|| fwspec
->ops
!= &arm_smmu_ops
) {
1159 dev_err(dev
, "cannot attach to SMMU, is it on the same bus?\n");
1164 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1165 * domains between of_xlate() and probe_device() - we have no way to cope
1166 * with that, so until ARM gets converted to rely on groups and default
1167 * domains, just say no (but more politely than by dereferencing NULL).
1168 * This should be at least a WARN_ON once that's sorted.
1170 cfg
= dev_iommu_priv_get(dev
);
1176 ret
= arm_smmu_rpm_get(smmu
);
1180 /* Ensure that the domain is finalised */
1181 ret
= arm_smmu_init_domain_context(domain
, smmu
);
1186 * Sanity check the domain. We don't support domains across
1189 if (smmu_domain
->smmu
!= smmu
) {
1191 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1192 dev_name(smmu_domain
->smmu
->dev
), dev_name(smmu
->dev
));
1197 /* Looks ok, so add the device to the domain */
1198 ret
= arm_smmu_domain_add_master(smmu_domain
, cfg
, fwspec
);
1201 * Setup an autosuspend delay to avoid bouncing runpm state.
1202 * Otherwise, if a driver for a suspended consumer device
1203 * unmaps buffers, it will runpm resume/suspend for each one.
1205 * For example, when used by a GPU device, when an application
1206 * or game exits, it can trigger unmapping 100s or 1000s of
1207 * buffers. With a runpm cycle for each buffer, that adds up
1208 * to 5-10sec worth of reprogramming the context bank, while
1209 * the system appears to be locked up to the user.
1211 pm_runtime_set_autosuspend_delay(smmu
->dev
, 20);
1212 pm_runtime_use_autosuspend(smmu
->dev
);
1215 arm_smmu_rpm_put(smmu
);
1219 static int arm_smmu_map(struct iommu_domain
*domain
, unsigned long iova
,
1220 phys_addr_t paddr
, size_t size
, int prot
, gfp_t gfp
)
1222 struct io_pgtable_ops
*ops
= to_smmu_domain(domain
)->pgtbl_ops
;
1223 struct arm_smmu_device
*smmu
= to_smmu_domain(domain
)->smmu
;
1229 arm_smmu_rpm_get(smmu
);
1230 ret
= ops
->map(ops
, iova
, paddr
, size
, prot
);
1231 arm_smmu_rpm_put(smmu
);
1236 static size_t arm_smmu_unmap(struct iommu_domain
*domain
, unsigned long iova
,
1237 size_t size
, struct iommu_iotlb_gather
*gather
)
1239 struct io_pgtable_ops
*ops
= to_smmu_domain(domain
)->pgtbl_ops
;
1240 struct arm_smmu_device
*smmu
= to_smmu_domain(domain
)->smmu
;
1246 arm_smmu_rpm_get(smmu
);
1247 ret
= ops
->unmap(ops
, iova
, size
, gather
);
1248 arm_smmu_rpm_put(smmu
);
1253 static void arm_smmu_flush_iotlb_all(struct iommu_domain
*domain
)
1255 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1256 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
1258 if (smmu_domain
->flush_ops
) {
1259 arm_smmu_rpm_get(smmu
);
1260 smmu_domain
->flush_ops
->tlb_flush_all(smmu_domain
);
1261 arm_smmu_rpm_put(smmu
);
1265 static void arm_smmu_iotlb_sync(struct iommu_domain
*domain
,
1266 struct iommu_iotlb_gather
*gather
)
1268 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1269 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
1274 arm_smmu_rpm_get(smmu
);
1275 if (smmu
->version
== ARM_SMMU_V2
||
1276 smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
)
1277 arm_smmu_tlb_sync_context(smmu_domain
);
1279 arm_smmu_tlb_sync_global(smmu
);
1280 arm_smmu_rpm_put(smmu
);
1283 static phys_addr_t
arm_smmu_iova_to_phys_hard(struct iommu_domain
*domain
,
1286 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1287 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
1288 struct arm_smmu_cfg
*cfg
= &smmu_domain
->cfg
;
1289 struct io_pgtable_ops
*ops
= smmu_domain
->pgtbl_ops
;
1290 struct device
*dev
= smmu
->dev
;
1294 unsigned long va
, flags
;
1295 int ret
, idx
= cfg
->cbndx
;
1297 ret
= arm_smmu_rpm_get(smmu
);
1301 spin_lock_irqsave(&smmu_domain
->cb_lock
, flags
);
1302 va
= iova
& ~0xfffUL
;
1303 if (cfg
->fmt
== ARM_SMMU_CTX_FMT_AARCH64
)
1304 arm_smmu_cb_writeq(smmu
, idx
, ARM_SMMU_CB_ATS1PR
, va
);
1306 arm_smmu_cb_write(smmu
, idx
, ARM_SMMU_CB_ATS1PR
, va
);
1308 reg
= arm_smmu_page(smmu
, ARM_SMMU_CB(smmu
, idx
)) + ARM_SMMU_CB_ATSR
;
1309 if (readl_poll_timeout_atomic(reg
, tmp
, !(tmp
& ARM_SMMU_ATSR_ACTIVE
),
1311 spin_unlock_irqrestore(&smmu_domain
->cb_lock
, flags
);
1313 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1315 return ops
->iova_to_phys(ops
, iova
);
1318 phys
= arm_smmu_cb_readq(smmu
, idx
, ARM_SMMU_CB_PAR
);
1319 spin_unlock_irqrestore(&smmu_domain
->cb_lock
, flags
);
1320 if (phys
& ARM_SMMU_CB_PAR_F
) {
1321 dev_err(dev
, "translation fault!\n");
1322 dev_err(dev
, "PAR = 0x%llx\n", phys
);
1326 arm_smmu_rpm_put(smmu
);
1328 return (phys
& GENMASK_ULL(39, 12)) | (iova
& 0xfff);
1331 static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain
*domain
,
1334 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1335 struct io_pgtable_ops
*ops
= smmu_domain
->pgtbl_ops
;
1337 if (domain
->type
== IOMMU_DOMAIN_IDENTITY
)
1343 if (smmu_domain
->smmu
->features
& ARM_SMMU_FEAT_TRANS_OPS
&&
1344 smmu_domain
->stage
== ARM_SMMU_DOMAIN_S1
)
1345 return arm_smmu_iova_to_phys_hard(domain
, iova
);
1347 return ops
->iova_to_phys(ops
, iova
);
1350 static bool arm_smmu_capable(enum iommu_cap cap
)
1353 case IOMMU_CAP_CACHE_COHERENCY
:
1355 * Return true here as the SMMU can always send out coherent
1359 case IOMMU_CAP_NOEXEC
:
1367 struct arm_smmu_device
*arm_smmu_get_by_fwnode(struct fwnode_handle
*fwnode
)
1369 struct device
*dev
= driver_find_device_by_fwnode(&arm_smmu_driver
.driver
,
1372 return dev
? dev_get_drvdata(dev
) : NULL
;
1375 static struct iommu_device
*arm_smmu_probe_device(struct device
*dev
)
1377 struct arm_smmu_device
*smmu
= NULL
;
1378 struct arm_smmu_master_cfg
*cfg
;
1379 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
1382 if (using_legacy_binding
) {
1383 ret
= arm_smmu_register_legacy_master(dev
, &smmu
);
1386 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1387 * will allocate/initialise a new one. Thus we need to update fwspec for
1390 fwspec
= dev_iommu_fwspec_get(dev
);
1393 } else if (fwspec
&& fwspec
->ops
== &arm_smmu_ops
) {
1394 smmu
= arm_smmu_get_by_fwnode(fwspec
->iommu_fwnode
);
1396 return ERR_PTR(-ENODEV
);
1400 for (i
= 0; i
< fwspec
->num_ids
; i
++) {
1401 u16 sid
= FIELD_GET(ARM_SMMU_SMR_ID
, fwspec
->ids
[i
]);
1402 u16 mask
= FIELD_GET(ARM_SMMU_SMR_MASK
, fwspec
->ids
[i
]);
1404 if (sid
& ~smmu
->streamid_mask
) {
1405 dev_err(dev
, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1406 sid
, smmu
->streamid_mask
);
1409 if (mask
& ~smmu
->smr_mask_mask
) {
1410 dev_err(dev
, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1411 mask
, smmu
->smr_mask_mask
);
1417 cfg
= kzalloc(offsetof(struct arm_smmu_master_cfg
, smendx
[i
]),
1423 dev_iommu_priv_set(dev
, cfg
);
1425 cfg
->smendx
[i
] = INVALID_SMENDX
;
1427 ret
= arm_smmu_rpm_get(smmu
);
1431 ret
= arm_smmu_master_alloc_smes(dev
);
1432 arm_smmu_rpm_put(smmu
);
1437 device_link_add(dev
, smmu
->dev
,
1438 DL_FLAG_PM_RUNTIME
| DL_FLAG_AUTOREMOVE_SUPPLIER
);
1440 return &smmu
->iommu
;
1445 iommu_fwspec_free(dev
);
1446 return ERR_PTR(ret
);
1449 static void arm_smmu_release_device(struct device
*dev
)
1451 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
1452 struct arm_smmu_master_cfg
*cfg
;
1453 struct arm_smmu_device
*smmu
;
1456 if (!fwspec
|| fwspec
->ops
!= &arm_smmu_ops
)
1459 cfg
= dev_iommu_priv_get(dev
);
1462 ret
= arm_smmu_rpm_get(smmu
);
1466 arm_smmu_master_free_smes(cfg
, fwspec
);
1468 arm_smmu_rpm_put(smmu
);
1470 dev_iommu_priv_set(dev
, NULL
);
1472 iommu_fwspec_free(dev
);
1475 static struct iommu_group
*arm_smmu_device_group(struct device
*dev
)
1477 struct arm_smmu_master_cfg
*cfg
= dev_iommu_priv_get(dev
);
1478 struct iommu_fwspec
*fwspec
= dev_iommu_fwspec_get(dev
);
1479 struct arm_smmu_device
*smmu
= cfg
->smmu
;
1480 struct iommu_group
*group
= NULL
;
1483 for_each_cfg_sme(cfg
, fwspec
, i
, idx
) {
1484 if (group
&& smmu
->s2crs
[idx
].group
&&
1485 group
!= smmu
->s2crs
[idx
].group
)
1486 return ERR_PTR(-EINVAL
);
1488 group
= smmu
->s2crs
[idx
].group
;
1492 return iommu_group_ref_get(group
);
1494 if (dev_is_pci(dev
))
1495 group
= pci_device_group(dev
);
1496 else if (dev_is_fsl_mc(dev
))
1497 group
= fsl_mc_device_group(dev
);
1499 group
= generic_device_group(dev
);
1501 /* Remember group for faster lookups */
1503 for_each_cfg_sme(cfg
, fwspec
, i
, idx
)
1504 smmu
->s2crs
[idx
].group
= group
;
1509 static int arm_smmu_domain_get_attr(struct iommu_domain
*domain
,
1510 enum iommu_attr attr
, void *data
)
1512 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1514 switch(domain
->type
) {
1515 case IOMMU_DOMAIN_UNMANAGED
:
1517 case DOMAIN_ATTR_NESTING
:
1518 *(int *)data
= (smmu_domain
->stage
== ARM_SMMU_DOMAIN_NESTED
);
1524 case IOMMU_DOMAIN_DMA
:
1526 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE
:
1527 *(int *)data
= smmu_domain
->non_strict
;
1538 static int arm_smmu_domain_set_attr(struct iommu_domain
*domain
,
1539 enum iommu_attr attr
, void *data
)
1542 struct arm_smmu_domain
*smmu_domain
= to_smmu_domain(domain
);
1544 mutex_lock(&smmu_domain
->init_mutex
);
1546 switch(domain
->type
) {
1547 case IOMMU_DOMAIN_UNMANAGED
:
1549 case DOMAIN_ATTR_NESTING
:
1550 if (smmu_domain
->smmu
) {
1556 smmu_domain
->stage
= ARM_SMMU_DOMAIN_NESTED
;
1558 smmu_domain
->stage
= ARM_SMMU_DOMAIN_S1
;
1564 case IOMMU_DOMAIN_DMA
:
1566 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE
:
1567 smmu_domain
->non_strict
= *(int *)data
;
1577 mutex_unlock(&smmu_domain
->init_mutex
);
1581 static int arm_smmu_of_xlate(struct device
*dev
, struct of_phandle_args
*args
)
1585 if (args
->args_count
> 0)
1586 fwid
|= FIELD_PREP(ARM_SMMU_SMR_ID
, args
->args
[0]);
1588 if (args
->args_count
> 1)
1589 fwid
|= FIELD_PREP(ARM_SMMU_SMR_MASK
, args
->args
[1]);
1590 else if (!of_property_read_u32(args
->np
, "stream-match-mask", &mask
))
1591 fwid
|= FIELD_PREP(ARM_SMMU_SMR_MASK
, mask
);
1593 return iommu_fwspec_add_ids(dev
, &fwid
, 1);
1596 static void arm_smmu_get_resv_regions(struct device
*dev
,
1597 struct list_head
*head
)
1599 struct iommu_resv_region
*region
;
1600 int prot
= IOMMU_WRITE
| IOMMU_NOEXEC
| IOMMU_MMIO
;
1602 region
= iommu_alloc_resv_region(MSI_IOVA_BASE
, MSI_IOVA_LENGTH
,
1603 prot
, IOMMU_RESV_SW_MSI
);
1607 list_add_tail(®ion
->list
, head
);
1609 iommu_dma_get_resv_regions(dev
, head
);
1612 static int arm_smmu_def_domain_type(struct device
*dev
)
1614 struct arm_smmu_master_cfg
*cfg
= dev_iommu_priv_get(dev
);
1615 const struct arm_smmu_impl
*impl
= cfg
->smmu
->impl
;
1617 if (impl
&& impl
->def_domain_type
)
1618 return impl
->def_domain_type(dev
);
1623 static struct iommu_ops arm_smmu_ops
= {
1624 .capable
= arm_smmu_capable
,
1625 .domain_alloc
= arm_smmu_domain_alloc
,
1626 .domain_free
= arm_smmu_domain_free
,
1627 .attach_dev
= arm_smmu_attach_dev
,
1628 .map
= arm_smmu_map
,
1629 .unmap
= arm_smmu_unmap
,
1630 .flush_iotlb_all
= arm_smmu_flush_iotlb_all
,
1631 .iotlb_sync
= arm_smmu_iotlb_sync
,
1632 .iova_to_phys
= arm_smmu_iova_to_phys
,
1633 .probe_device
= arm_smmu_probe_device
,
1634 .release_device
= arm_smmu_release_device
,
1635 .device_group
= arm_smmu_device_group
,
1636 .domain_get_attr
= arm_smmu_domain_get_attr
,
1637 .domain_set_attr
= arm_smmu_domain_set_attr
,
1638 .of_xlate
= arm_smmu_of_xlate
,
1639 .get_resv_regions
= arm_smmu_get_resv_regions
,
1640 .put_resv_regions
= generic_iommu_put_resv_regions
,
1641 .def_domain_type
= arm_smmu_def_domain_type
,
1642 .pgsize_bitmap
= -1UL, /* Restricted during device attach */
1645 static void arm_smmu_device_reset(struct arm_smmu_device
*smmu
)
1650 /* clear global FSR */
1651 reg
= arm_smmu_gr0_read(smmu
, ARM_SMMU_GR0_sGFSR
);
1652 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_sGFSR
, reg
);
1655 * Reset stream mapping groups: Initial values mark all SMRn as
1656 * invalid and all S2CRn as bypass unless overridden.
1658 for (i
= 0; i
< smmu
->num_mapping_groups
; ++i
)
1659 arm_smmu_write_sme(smmu
, i
);
1661 /* Make sure all context banks are disabled and clear CB_FSR */
1662 for (i
= 0; i
< smmu
->num_context_banks
; ++i
) {
1663 arm_smmu_write_context_bank(smmu
, i
);
1664 arm_smmu_cb_write(smmu
, i
, ARM_SMMU_CB_FSR
, ARM_SMMU_FSR_FAULT
);
1667 /* Invalidate the TLB, just in case */
1668 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_TLBIALLH
, QCOM_DUMMY_VAL
);
1669 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_TLBIALLNSNH
, QCOM_DUMMY_VAL
);
1671 reg
= arm_smmu_gr0_read(smmu
, ARM_SMMU_GR0_sCR0
);
1673 /* Enable fault reporting */
1674 reg
|= (ARM_SMMU_sCR0_GFRE
| ARM_SMMU_sCR0_GFIE
|
1675 ARM_SMMU_sCR0_GCFGFRE
| ARM_SMMU_sCR0_GCFGFIE
);
1677 /* Disable TLB broadcasting. */
1678 reg
|= (ARM_SMMU_sCR0_VMIDPNE
| ARM_SMMU_sCR0_PTM
);
1680 /* Enable client access, handling unmatched streams as appropriate */
1681 reg
&= ~ARM_SMMU_sCR0_CLIENTPD
;
1683 reg
|= ARM_SMMU_sCR0_USFCFG
;
1685 reg
&= ~ARM_SMMU_sCR0_USFCFG
;
1687 /* Disable forced broadcasting */
1688 reg
&= ~ARM_SMMU_sCR0_FB
;
1690 /* Don't upgrade barriers */
1691 reg
&= ~(ARM_SMMU_sCR0_BSU
);
1693 if (smmu
->features
& ARM_SMMU_FEAT_VMID16
)
1694 reg
|= ARM_SMMU_sCR0_VMID16EN
;
1696 if (smmu
->features
& ARM_SMMU_FEAT_EXIDS
)
1697 reg
|= ARM_SMMU_sCR0_EXIDENABLE
;
1699 if (smmu
->impl
&& smmu
->impl
->reset
)
1700 smmu
->impl
->reset(smmu
);
1702 /* Push the button */
1703 arm_smmu_tlb_sync_global(smmu
);
1704 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_sCR0
, reg
);
1707 static int arm_smmu_id_size_to_bits(int size
)
1726 static int arm_smmu_device_cfg_probe(struct arm_smmu_device
*smmu
)
1730 bool cttw_reg
, cttw_fw
= smmu
->features
& ARM_SMMU_FEAT_COHERENT_WALK
;
1733 dev_notice(smmu
->dev
, "probing hardware configuration...\n");
1734 dev_notice(smmu
->dev
, "SMMUv%d with:\n",
1735 smmu
->version
== ARM_SMMU_V2
? 2 : 1);
1738 id
= arm_smmu_gr0_read(smmu
, ARM_SMMU_GR0_ID0
);
1740 /* Restrict available stages based on module parameter */
1741 if (force_stage
== 1)
1742 id
&= ~(ARM_SMMU_ID0_S2TS
| ARM_SMMU_ID0_NTS
);
1743 else if (force_stage
== 2)
1744 id
&= ~(ARM_SMMU_ID0_S1TS
| ARM_SMMU_ID0_NTS
);
1746 if (id
& ARM_SMMU_ID0_S1TS
) {
1747 smmu
->features
|= ARM_SMMU_FEAT_TRANS_S1
;
1748 dev_notice(smmu
->dev
, "\tstage 1 translation\n");
1751 if (id
& ARM_SMMU_ID0_S2TS
) {
1752 smmu
->features
|= ARM_SMMU_FEAT_TRANS_S2
;
1753 dev_notice(smmu
->dev
, "\tstage 2 translation\n");
1756 if (id
& ARM_SMMU_ID0_NTS
) {
1757 smmu
->features
|= ARM_SMMU_FEAT_TRANS_NESTED
;
1758 dev_notice(smmu
->dev
, "\tnested translation\n");
1761 if (!(smmu
->features
&
1762 (ARM_SMMU_FEAT_TRANS_S1
| ARM_SMMU_FEAT_TRANS_S2
))) {
1763 dev_err(smmu
->dev
, "\tno translation support!\n");
1767 if ((id
& ARM_SMMU_ID0_S1TS
) &&
1768 ((smmu
->version
< ARM_SMMU_V2
) || !(id
& ARM_SMMU_ID0_ATOSNS
))) {
1769 smmu
->features
|= ARM_SMMU_FEAT_TRANS_OPS
;
1770 dev_notice(smmu
->dev
, "\taddress translation ops\n");
1774 * In order for DMA API calls to work properly, we must defer to what
1775 * the FW says about coherency, regardless of what the hardware claims.
1776 * Fortunately, this also opens up a workaround for systems where the
1777 * ID register value has ended up configured incorrectly.
1779 cttw_reg
= !!(id
& ARM_SMMU_ID0_CTTW
);
1780 if (cttw_fw
|| cttw_reg
)
1781 dev_notice(smmu
->dev
, "\t%scoherent table walk\n",
1782 cttw_fw
? "" : "non-");
1783 if (cttw_fw
!= cttw_reg
)
1784 dev_notice(smmu
->dev
,
1785 "\t(IDR0.CTTW overridden by FW configuration)\n");
1787 /* Max. number of entries we have for stream matching/indexing */
1788 if (smmu
->version
== ARM_SMMU_V2
&& id
& ARM_SMMU_ID0_EXIDS
) {
1789 smmu
->features
|= ARM_SMMU_FEAT_EXIDS
;
1792 size
= 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB
, id
);
1794 smmu
->streamid_mask
= size
- 1;
1795 if (id
& ARM_SMMU_ID0_SMS
) {
1796 smmu
->features
|= ARM_SMMU_FEAT_STREAM_MATCH
;
1797 size
= FIELD_GET(ARM_SMMU_ID0_NUMSMRG
, id
);
1800 "stream-matching supported, but no SMRs present!\n");
1804 /* Zero-initialised to mark as invalid */
1805 smmu
->smrs
= devm_kcalloc(smmu
->dev
, size
, sizeof(*smmu
->smrs
),
1810 dev_notice(smmu
->dev
,
1811 "\tstream matching with %u register groups", size
);
1813 /* s2cr->type == 0 means translation, so initialise explicitly */
1814 smmu
->s2crs
= devm_kmalloc_array(smmu
->dev
, size
, sizeof(*smmu
->s2crs
),
1818 for (i
= 0; i
< size
; i
++)
1819 smmu
->s2crs
[i
] = s2cr_init_val
;
1821 smmu
->num_mapping_groups
= size
;
1822 mutex_init(&smmu
->stream_map_mutex
);
1823 spin_lock_init(&smmu
->global_sync_lock
);
1825 if (smmu
->version
< ARM_SMMU_V2
||
1826 !(id
& ARM_SMMU_ID0_PTFS_NO_AARCH32
)) {
1827 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH32_L
;
1828 if (!(id
& ARM_SMMU_ID0_PTFS_NO_AARCH32S
))
1829 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH32_S
;
1833 id
= arm_smmu_gr0_read(smmu
, ARM_SMMU_GR0_ID1
);
1834 smmu
->pgshift
= (id
& ARM_SMMU_ID1_PAGESIZE
) ? 16 : 12;
1836 /* Check for size mismatch of SMMU address space from mapped region */
1837 size
= 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB
, id
) + 1);
1838 if (smmu
->numpage
!= 2 * size
<< smmu
->pgshift
)
1840 "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1841 2 * size
<< smmu
->pgshift
, smmu
->numpage
);
1842 /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1843 smmu
->numpage
= size
;
1845 smmu
->num_s2_context_banks
= FIELD_GET(ARM_SMMU_ID1_NUMS2CB
, id
);
1846 smmu
->num_context_banks
= FIELD_GET(ARM_SMMU_ID1_NUMCB
, id
);
1847 if (smmu
->num_s2_context_banks
> smmu
->num_context_banks
) {
1848 dev_err(smmu
->dev
, "impossible number of S2 context banks!\n");
1851 dev_notice(smmu
->dev
, "\t%u context banks (%u stage-2 only)\n",
1852 smmu
->num_context_banks
, smmu
->num_s2_context_banks
);
1853 smmu
->cbs
= devm_kcalloc(smmu
->dev
, smmu
->num_context_banks
,
1854 sizeof(*smmu
->cbs
), GFP_KERNEL
);
1859 id
= arm_smmu_gr0_read(smmu
, ARM_SMMU_GR0_ID2
);
1860 size
= arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS
, id
));
1861 smmu
->ipa_size
= size
;
1863 /* The output mask is also applied for bypass */
1864 size
= arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS
, id
));
1865 smmu
->pa_size
= size
;
1867 if (id
& ARM_SMMU_ID2_VMID16
)
1868 smmu
->features
|= ARM_SMMU_FEAT_VMID16
;
1871 * What the page table walker can address actually depends on which
1872 * descriptor format is in use, but since a) we don't know that yet,
1873 * and b) it can vary per context bank, this will have to do...
1875 if (dma_set_mask_and_coherent(smmu
->dev
, DMA_BIT_MASK(size
)))
1877 "failed to set DMA mask for table walker\n");
1879 if (smmu
->version
< ARM_SMMU_V2
) {
1880 smmu
->va_size
= smmu
->ipa_size
;
1881 if (smmu
->version
== ARM_SMMU_V1_64K
)
1882 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_64K
;
1884 size
= FIELD_GET(ARM_SMMU_ID2_UBS
, id
);
1885 smmu
->va_size
= arm_smmu_id_size_to_bits(size
);
1886 if (id
& ARM_SMMU_ID2_PTFS_4K
)
1887 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_4K
;
1888 if (id
& ARM_SMMU_ID2_PTFS_16K
)
1889 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_16K
;
1890 if (id
& ARM_SMMU_ID2_PTFS_64K
)
1891 smmu
->features
|= ARM_SMMU_FEAT_FMT_AARCH64_64K
;
1894 /* Now we've corralled the various formats, what'll it do? */
1895 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH32_S
)
1896 smmu
->pgsize_bitmap
|= SZ_4K
| SZ_64K
| SZ_1M
| SZ_16M
;
1897 if (smmu
->features
&
1898 (ARM_SMMU_FEAT_FMT_AARCH32_L
| ARM_SMMU_FEAT_FMT_AARCH64_4K
))
1899 smmu
->pgsize_bitmap
|= SZ_4K
| SZ_2M
| SZ_1G
;
1900 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH64_16K
)
1901 smmu
->pgsize_bitmap
|= SZ_16K
| SZ_32M
;
1902 if (smmu
->features
& ARM_SMMU_FEAT_FMT_AARCH64_64K
)
1903 smmu
->pgsize_bitmap
|= SZ_64K
| SZ_512M
;
1905 if (arm_smmu_ops
.pgsize_bitmap
== -1UL)
1906 arm_smmu_ops
.pgsize_bitmap
= smmu
->pgsize_bitmap
;
1908 arm_smmu_ops
.pgsize_bitmap
|= smmu
->pgsize_bitmap
;
1909 dev_notice(smmu
->dev
, "\tSupported page sizes: 0x%08lx\n",
1910 smmu
->pgsize_bitmap
);
1913 if (smmu
->features
& ARM_SMMU_FEAT_TRANS_S1
)
1914 dev_notice(smmu
->dev
, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1915 smmu
->va_size
, smmu
->ipa_size
);
1917 if (smmu
->features
& ARM_SMMU_FEAT_TRANS_S2
)
1918 dev_notice(smmu
->dev
, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1919 smmu
->ipa_size
, smmu
->pa_size
);
1921 if (smmu
->impl
&& smmu
->impl
->cfg_probe
)
1922 return smmu
->impl
->cfg_probe(smmu
);
1927 struct arm_smmu_match_data
{
1928 enum arm_smmu_arch_version version
;
1929 enum arm_smmu_implementation model
;
1932 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1933 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1935 ARM_SMMU_MATCH_DATA(smmu_generic_v1
, ARM_SMMU_V1
, GENERIC_SMMU
);
1936 ARM_SMMU_MATCH_DATA(smmu_generic_v2
, ARM_SMMU_V2
, GENERIC_SMMU
);
1937 ARM_SMMU_MATCH_DATA(arm_mmu401
, ARM_SMMU_V1_64K
, GENERIC_SMMU
);
1938 ARM_SMMU_MATCH_DATA(arm_mmu500
, ARM_SMMU_V2
, ARM_MMU500
);
1939 ARM_SMMU_MATCH_DATA(cavium_smmuv2
, ARM_SMMU_V2
, CAVIUM_SMMUV2
);
1940 ARM_SMMU_MATCH_DATA(qcom_smmuv2
, ARM_SMMU_V2
, QCOM_SMMUV2
);
1942 static const struct of_device_id arm_smmu_of_match
[] = {
1943 { .compatible
= "arm,smmu-v1", .data
= &smmu_generic_v1
},
1944 { .compatible
= "arm,smmu-v2", .data
= &smmu_generic_v2
},
1945 { .compatible
= "arm,mmu-400", .data
= &smmu_generic_v1
},
1946 { .compatible
= "arm,mmu-401", .data
= &arm_mmu401
},
1947 { .compatible
= "arm,mmu-500", .data
= &arm_mmu500
},
1948 { .compatible
= "cavium,smmu-v2", .data
= &cavium_smmuv2
},
1949 { .compatible
= "qcom,smmu-v2", .data
= &qcom_smmuv2
},
1952 MODULE_DEVICE_TABLE(of
, arm_smmu_of_match
);
1955 static int acpi_smmu_get_data(u32 model
, struct arm_smmu_device
*smmu
)
1960 case ACPI_IORT_SMMU_V1
:
1961 case ACPI_IORT_SMMU_CORELINK_MMU400
:
1962 smmu
->version
= ARM_SMMU_V1
;
1963 smmu
->model
= GENERIC_SMMU
;
1965 case ACPI_IORT_SMMU_CORELINK_MMU401
:
1966 smmu
->version
= ARM_SMMU_V1_64K
;
1967 smmu
->model
= GENERIC_SMMU
;
1969 case ACPI_IORT_SMMU_V2
:
1970 smmu
->version
= ARM_SMMU_V2
;
1971 smmu
->model
= GENERIC_SMMU
;
1973 case ACPI_IORT_SMMU_CORELINK_MMU500
:
1974 smmu
->version
= ARM_SMMU_V2
;
1975 smmu
->model
= ARM_MMU500
;
1977 case ACPI_IORT_SMMU_CAVIUM_THUNDERX
:
1978 smmu
->version
= ARM_SMMU_V2
;
1979 smmu
->model
= CAVIUM_SMMUV2
;
1988 static int arm_smmu_device_acpi_probe(struct platform_device
*pdev
,
1989 struct arm_smmu_device
*smmu
)
1991 struct device
*dev
= smmu
->dev
;
1992 struct acpi_iort_node
*node
=
1993 *(struct acpi_iort_node
**)dev_get_platdata(dev
);
1994 struct acpi_iort_smmu
*iort_smmu
;
1997 /* Retrieve SMMU1/2 specific data */
1998 iort_smmu
= (struct acpi_iort_smmu
*)node
->node_data
;
2000 ret
= acpi_smmu_get_data(iort_smmu
->model
, smmu
);
2004 /* Ignore the configuration access interrupt */
2005 smmu
->num_global_irqs
= 1;
2007 if (iort_smmu
->flags
& ACPI_IORT_SMMU_COHERENT_WALK
)
2008 smmu
->features
|= ARM_SMMU_FEAT_COHERENT_WALK
;
2013 static inline int arm_smmu_device_acpi_probe(struct platform_device
*pdev
,
2014 struct arm_smmu_device
*smmu
)
2020 static int arm_smmu_device_dt_probe(struct platform_device
*pdev
,
2021 struct arm_smmu_device
*smmu
)
2023 const struct arm_smmu_match_data
*data
;
2024 struct device
*dev
= &pdev
->dev
;
2025 bool legacy_binding
;
2027 if (of_property_read_u32(dev
->of_node
, "#global-interrupts",
2028 &smmu
->num_global_irqs
)) {
2029 dev_err(dev
, "missing #global-interrupts property\n");
2033 data
= of_device_get_match_data(dev
);
2034 smmu
->version
= data
->version
;
2035 smmu
->model
= data
->model
;
2037 legacy_binding
= of_find_property(dev
->of_node
, "mmu-masters", NULL
);
2038 if (legacy_binding
&& !using_generic_binding
) {
2039 if (!using_legacy_binding
) {
2040 pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2041 IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
) ? "DMA API" : "SMMU");
2043 using_legacy_binding
= true;
2044 } else if (!legacy_binding
&& !using_legacy_binding
) {
2045 using_generic_binding
= true;
2047 dev_err(dev
, "not probing due to mismatched DT properties\n");
2051 if (of_dma_is_coherent(dev
->of_node
))
2052 smmu
->features
|= ARM_SMMU_FEAT_COHERENT_WALK
;
2057 static int arm_smmu_bus_init(struct iommu_ops
*ops
)
2061 /* Oh, for a proper bus abstraction */
2062 if (!iommu_present(&platform_bus_type
)) {
2063 err
= bus_set_iommu(&platform_bus_type
, ops
);
2067 #ifdef CONFIG_ARM_AMBA
2068 if (!iommu_present(&amba_bustype
)) {
2069 err
= bus_set_iommu(&amba_bustype
, ops
);
2071 goto err_reset_platform_ops
;
2075 if (!iommu_present(&pci_bus_type
)) {
2076 err
= bus_set_iommu(&pci_bus_type
, ops
);
2078 goto err_reset_amba_ops
;
2081 #ifdef CONFIG_FSL_MC_BUS
2082 if (!iommu_present(&fsl_mc_bus_type
)) {
2083 err
= bus_set_iommu(&fsl_mc_bus_type
, ops
);
2085 goto err_reset_pci_ops
;
2090 err_reset_pci_ops
: __maybe_unused
;
2092 bus_set_iommu(&pci_bus_type
, NULL
);
2094 err_reset_amba_ops
: __maybe_unused
;
2095 #ifdef CONFIG_ARM_AMBA
2096 bus_set_iommu(&amba_bustype
, NULL
);
2098 err_reset_platform_ops
: __maybe_unused
;
2099 bus_set_iommu(&platform_bus_type
, NULL
);
2103 static int arm_smmu_device_probe(struct platform_device
*pdev
)
2105 struct resource
*res
;
2106 resource_size_t ioaddr
;
2107 struct arm_smmu_device
*smmu
;
2108 struct device
*dev
= &pdev
->dev
;
2109 int num_irqs
, i
, err
;
2111 smmu
= devm_kzalloc(dev
, sizeof(*smmu
), GFP_KERNEL
);
2113 dev_err(dev
, "failed to allocate arm_smmu_device\n");
2119 err
= arm_smmu_device_dt_probe(pdev
, smmu
);
2121 err
= arm_smmu_device_acpi_probe(pdev
, smmu
);
2126 smmu
= arm_smmu_impl_init(smmu
);
2128 return PTR_ERR(smmu
);
2130 res
= platform_get_resource(pdev
, IORESOURCE_MEM
, 0);
2131 ioaddr
= res
->start
;
2132 smmu
->base
= devm_ioremap_resource(dev
, res
);
2133 if (IS_ERR(smmu
->base
))
2134 return PTR_ERR(smmu
->base
);
2136 * The resource size should effectively match the value of SMMU_TOP;
2137 * stash that temporarily until we know PAGESIZE to validate it with.
2139 smmu
->numpage
= resource_size(res
);
2142 while ((res
= platform_get_resource(pdev
, IORESOURCE_IRQ
, num_irqs
))) {
2144 if (num_irqs
> smmu
->num_global_irqs
)
2145 smmu
->num_context_irqs
++;
2148 if (!smmu
->num_context_irqs
) {
2149 dev_err(dev
, "found %d interrupts but expected at least %d\n",
2150 num_irqs
, smmu
->num_global_irqs
+ 1);
2154 smmu
->irqs
= devm_kcalloc(dev
, num_irqs
, sizeof(*smmu
->irqs
),
2157 dev_err(dev
, "failed to allocate %d irqs\n", num_irqs
);
2161 for (i
= 0; i
< num_irqs
; ++i
) {
2162 int irq
= platform_get_irq(pdev
, i
);
2166 smmu
->irqs
[i
] = irq
;
2169 err
= devm_clk_bulk_get_all(dev
, &smmu
->clks
);
2171 dev_err(dev
, "failed to get clocks %d\n", err
);
2174 smmu
->num_clks
= err
;
2176 err
= clk_bulk_prepare_enable(smmu
->num_clks
, smmu
->clks
);
2180 err
= arm_smmu_device_cfg_probe(smmu
);
2184 if (smmu
->version
== ARM_SMMU_V2
) {
2185 if (smmu
->num_context_banks
> smmu
->num_context_irqs
) {
2187 "found only %d context irq(s) but %d required\n",
2188 smmu
->num_context_irqs
, smmu
->num_context_banks
);
2192 /* Ignore superfluous interrupts */
2193 smmu
->num_context_irqs
= smmu
->num_context_banks
;
2196 for (i
= 0; i
< smmu
->num_global_irqs
; ++i
) {
2197 err
= devm_request_irq(smmu
->dev
, smmu
->irqs
[i
],
2198 arm_smmu_global_fault
,
2200 "arm-smmu global fault",
2203 dev_err(dev
, "failed to request global IRQ %d (%u)\n",
2209 err
= iommu_device_sysfs_add(&smmu
->iommu
, smmu
->dev
, NULL
,
2210 "smmu.%pa", &ioaddr
);
2212 dev_err(dev
, "Failed to register iommu in sysfs\n");
2216 iommu_device_set_ops(&smmu
->iommu
, &arm_smmu_ops
);
2217 iommu_device_set_fwnode(&smmu
->iommu
, dev
->fwnode
);
2219 err
= iommu_device_register(&smmu
->iommu
);
2221 dev_err(dev
, "Failed to register iommu\n");
2225 platform_set_drvdata(pdev
, smmu
);
2226 arm_smmu_device_reset(smmu
);
2227 arm_smmu_test_smr_masks(smmu
);
2230 * We want to avoid touching dev->power.lock in fastpaths unless
2231 * it's really going to do something useful - pm_runtime_enabled()
2232 * can serve as an ideal proxy for that decision. So, conditionally
2233 * enable pm_runtime.
2235 if (dev
->pm_domain
) {
2236 pm_runtime_set_active(dev
);
2237 pm_runtime_enable(dev
);
2241 * For ACPI and generic DT bindings, an SMMU will be probed before
2242 * any device which might need it, so we want the bus ops in place
2243 * ready to handle default domain setup as soon as any SMMU exists.
2245 if (!using_legacy_binding
)
2246 return arm_smmu_bus_init(&arm_smmu_ops
);
2251 static int arm_smmu_device_remove(struct platform_device
*pdev
)
2253 struct arm_smmu_device
*smmu
= platform_get_drvdata(pdev
);
2258 if (!bitmap_empty(smmu
->context_map
, ARM_SMMU_MAX_CBS
))
2259 dev_notice(&pdev
->dev
, "disabling translation\n");
2261 arm_smmu_bus_init(NULL
);
2262 iommu_device_unregister(&smmu
->iommu
);
2263 iommu_device_sysfs_remove(&smmu
->iommu
);
2265 arm_smmu_rpm_get(smmu
);
2266 /* Turn the thing off */
2267 arm_smmu_gr0_write(smmu
, ARM_SMMU_GR0_sCR0
, ARM_SMMU_sCR0_CLIENTPD
);
2268 arm_smmu_rpm_put(smmu
);
2270 if (pm_runtime_enabled(smmu
->dev
))
2271 pm_runtime_force_suspend(smmu
->dev
);
2273 clk_bulk_disable(smmu
->num_clks
, smmu
->clks
);
2275 clk_bulk_unprepare(smmu
->num_clks
, smmu
->clks
);
2279 static void arm_smmu_device_shutdown(struct platform_device
*pdev
)
2281 arm_smmu_device_remove(pdev
);
2284 static int __maybe_unused
arm_smmu_runtime_resume(struct device
*dev
)
2286 struct arm_smmu_device
*smmu
= dev_get_drvdata(dev
);
2289 ret
= clk_bulk_enable(smmu
->num_clks
, smmu
->clks
);
2293 arm_smmu_device_reset(smmu
);
2298 static int __maybe_unused
arm_smmu_runtime_suspend(struct device
*dev
)
2300 struct arm_smmu_device
*smmu
= dev_get_drvdata(dev
);
2302 clk_bulk_disable(smmu
->num_clks
, smmu
->clks
);
2307 static int __maybe_unused
arm_smmu_pm_resume(struct device
*dev
)
2309 if (pm_runtime_suspended(dev
))
2312 return arm_smmu_runtime_resume(dev
);
2315 static int __maybe_unused
arm_smmu_pm_suspend(struct device
*dev
)
2317 if (pm_runtime_suspended(dev
))
2320 return arm_smmu_runtime_suspend(dev
);
2323 static const struct dev_pm_ops arm_smmu_pm_ops
= {
2324 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend
, arm_smmu_pm_resume
)
2325 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend
,
2326 arm_smmu_runtime_resume
, NULL
)
2329 static struct platform_driver arm_smmu_driver
= {
2332 .of_match_table
= arm_smmu_of_match
,
2333 .pm
= &arm_smmu_pm_ops
,
2334 .suppress_bind_attrs
= true,
2336 .probe
= arm_smmu_device_probe
,
2337 .remove
= arm_smmu_device_remove
,
2338 .shutdown
= arm_smmu_device_shutdown
,
2340 module_platform_driver(arm_smmu_driver
);
2342 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2343 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2344 MODULE_ALIAS("platform:arm-smmu");
2345 MODULE_LICENSE("GPL v2");