Revert "tty: hvc: Fix data abort due to race in hvc_open"
[linux/fpc-iii.git] / drivers / iommu / arm-smmu.c
bloba6a5796e9c41f2ca371c9b2a2b38093234fbe59e
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * IOMMU API for ARM architected SMMU implementations.
5 * Copyright (C) 2013 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver currently supports:
10 * - SMMUv1 and v2 implementations
11 * - Stream-matching and stream-indexing
12 * - v7/v8 long-descriptor format
13 * - Non-secure access to the SMMU
14 * - Context fault reporting
15 * - Extended Stream ID (16 bit)
18 #define pr_fmt(fmt) "arm-smmu: " fmt
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
28 #include <linux/io.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/of_iommu.h>
35 #include <linux/pci.h>
36 #include <linux/platform_device.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/ratelimit.h>
39 #include <linux/slab.h>
41 #include <linux/amba/bus.h>
42 #include <linux/fsl/mc.h>
44 #include "arm-smmu.h"
47 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
48 * global register space are still, in fact, using a hypervisor to mediate it
49 * by trapping and emulating register accesses. Sadly, some deployed versions
50 * of said trapping code have bugs wherein they go horribly wrong for stores
51 * using r31 (i.e. XZR/WZR) as the source register.
53 #define QCOM_DUMMY_VAL -1
55 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
56 #define TLB_SPIN_COUNT 10
58 #define MSI_IOVA_BASE 0x8000000
59 #define MSI_IOVA_LENGTH 0x100000
61 static int force_stage;
62 module_param(force_stage, int, S_IRUGO);
63 MODULE_PARM_DESC(force_stage,
64 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
65 static bool disable_bypass =
66 IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
67 module_param(disable_bypass, bool, S_IRUGO);
68 MODULE_PARM_DESC(disable_bypass,
69 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
71 struct arm_smmu_s2cr {
72 struct iommu_group *group;
73 int count;
74 enum arm_smmu_s2cr_type type;
75 enum arm_smmu_s2cr_privcfg privcfg;
76 u8 cbndx;
79 #define s2cr_init_val (struct arm_smmu_s2cr){ \
80 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
83 struct arm_smmu_smr {
84 u16 mask;
85 u16 id;
86 bool valid;
89 struct arm_smmu_cb {
90 u64 ttbr[2];
91 u32 tcr[2];
92 u32 mair[2];
93 struct arm_smmu_cfg *cfg;
96 struct arm_smmu_master_cfg {
97 struct arm_smmu_device *smmu;
98 s16 smendx[];
100 #define INVALID_SMENDX -1
101 #define cfg_smendx(cfg, fw, i) \
102 (i >= fw->num_ids ? INVALID_SMENDX : cfg->smendx[i])
103 #define for_each_cfg_sme(cfg, fw, i, idx) \
104 for (i = 0; idx = cfg_smendx(cfg, fw, i), i < fw->num_ids; ++i)
106 static bool using_legacy_binding, using_generic_binding;
108 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
110 if (pm_runtime_enabled(smmu->dev))
111 return pm_runtime_get_sync(smmu->dev);
113 return 0;
116 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
118 if (pm_runtime_enabled(smmu->dev))
119 pm_runtime_put_autosuspend(smmu->dev);
122 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
124 return container_of(dom, struct arm_smmu_domain, domain);
127 static struct platform_driver arm_smmu_driver;
128 static struct iommu_ops arm_smmu_ops;
130 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
131 static int arm_smmu_bus_init(struct iommu_ops *ops);
133 static struct device_node *dev_get_dev_node(struct device *dev)
135 if (dev_is_pci(dev)) {
136 struct pci_bus *bus = to_pci_dev(dev)->bus;
138 while (!pci_is_root_bus(bus))
139 bus = bus->parent;
140 return of_node_get(bus->bridge->parent->of_node);
143 return of_node_get(dev->of_node);
146 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
148 *((__be32 *)data) = cpu_to_be32(alias);
149 return 0; /* Continue walking */
152 static int __find_legacy_master_phandle(struct device *dev, void *data)
154 struct of_phandle_iterator *it = *(void **)data;
155 struct device_node *np = it->node;
156 int err;
158 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
159 "#stream-id-cells", -1)
160 if (it->node == np) {
161 *(void **)data = dev;
162 return 1;
164 it->node = np;
165 return err == -ENOENT ? 0 : err;
168 static int arm_smmu_register_legacy_master(struct device *dev,
169 struct arm_smmu_device **smmu)
171 struct device *smmu_dev;
172 struct device_node *np;
173 struct of_phandle_iterator it;
174 void *data = &it;
175 u32 *sids;
176 __be32 pci_sid;
177 int err;
179 np = dev_get_dev_node(dev);
180 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
181 of_node_put(np);
182 return -ENODEV;
185 it.node = np;
186 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
187 __find_legacy_master_phandle);
188 smmu_dev = data;
189 of_node_put(np);
190 if (err == 0)
191 return -ENODEV;
192 if (err < 0)
193 return err;
195 if (dev_is_pci(dev)) {
196 /* "mmu-masters" assumes Stream ID == Requester ID */
197 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
198 &pci_sid);
199 it.cur = &pci_sid;
200 it.cur_count = 1;
203 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
204 &arm_smmu_ops);
205 if (err)
206 return err;
208 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
209 if (!sids)
210 return -ENOMEM;
212 *smmu = dev_get_drvdata(smmu_dev);
213 of_phandle_iterator_args(&it, sids, it.cur_count);
214 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
215 kfree(sids);
216 return err;
220 * With the legacy DT binding in play, we have no guarantees about
221 * probe order, but then we're also not doing default domains, so we can
222 * delay setting bus ops until we're sure every possible SMMU is ready,
223 * and that way ensure that no add_device() calls get missed.
225 static int arm_smmu_legacy_bus_init(void)
227 if (using_legacy_binding)
228 return arm_smmu_bus_init(&arm_smmu_ops);
229 return 0;
231 device_initcall_sync(arm_smmu_legacy_bus_init);
232 #else
233 static int arm_smmu_register_legacy_master(struct device *dev,
234 struct arm_smmu_device **smmu)
236 return -ENODEV;
238 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
240 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
242 int idx;
244 do {
245 idx = find_next_zero_bit(map, end, start);
246 if (idx == end)
247 return -ENOSPC;
248 } while (test_and_set_bit(idx, map));
250 return idx;
253 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
255 clear_bit(idx, map);
258 /* Wait for any pending TLB invalidations to complete */
259 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
260 int sync, int status)
262 unsigned int spin_cnt, delay;
263 u32 reg;
265 if (smmu->impl && unlikely(smmu->impl->tlb_sync))
266 return smmu->impl->tlb_sync(smmu, page, sync, status);
268 arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
269 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
270 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
271 reg = arm_smmu_readl(smmu, page, status);
272 if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
273 return;
274 cpu_relax();
276 udelay(delay);
278 dev_err_ratelimited(smmu->dev,
279 "TLB sync timed out -- SMMU may be deadlocked\n");
282 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
284 unsigned long flags;
286 spin_lock_irqsave(&smmu->global_sync_lock, flags);
287 __arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
288 ARM_SMMU_GR0_sTLBGSTATUS);
289 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
292 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
294 struct arm_smmu_device *smmu = smmu_domain->smmu;
295 unsigned long flags;
297 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
298 __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
299 ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
300 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
303 static void arm_smmu_tlb_inv_context_s1(void *cookie)
305 struct arm_smmu_domain *smmu_domain = cookie;
307 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
308 * current CPU are visible beforehand.
310 wmb();
311 arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
312 ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
313 arm_smmu_tlb_sync_context(smmu_domain);
316 static void arm_smmu_tlb_inv_context_s2(void *cookie)
318 struct arm_smmu_domain *smmu_domain = cookie;
319 struct arm_smmu_device *smmu = smmu_domain->smmu;
321 /* See above */
322 wmb();
323 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
324 arm_smmu_tlb_sync_global(smmu);
327 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
328 size_t granule, void *cookie, int reg)
330 struct arm_smmu_domain *smmu_domain = cookie;
331 struct arm_smmu_device *smmu = smmu_domain->smmu;
332 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
333 int idx = cfg->cbndx;
335 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
336 wmb();
338 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
339 iova = (iova >> 12) << 12;
340 iova |= cfg->asid;
341 do {
342 arm_smmu_cb_write(smmu, idx, reg, iova);
343 iova += granule;
344 } while (size -= granule);
345 } else {
346 iova >>= 12;
347 iova |= (u64)cfg->asid << 48;
348 do {
349 arm_smmu_cb_writeq(smmu, idx, reg, iova);
350 iova += granule >> 12;
351 } while (size -= granule);
355 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
356 size_t granule, void *cookie, int reg)
358 struct arm_smmu_domain *smmu_domain = cookie;
359 struct arm_smmu_device *smmu = smmu_domain->smmu;
360 int idx = smmu_domain->cfg.cbndx;
362 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
363 wmb();
365 iova >>= 12;
366 do {
367 if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
368 arm_smmu_cb_writeq(smmu, idx, reg, iova);
369 else
370 arm_smmu_cb_write(smmu, idx, reg, iova);
371 iova += granule >> 12;
372 } while (size -= granule);
375 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
376 size_t granule, void *cookie)
378 arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
379 ARM_SMMU_CB_S1_TLBIVA);
380 arm_smmu_tlb_sync_context(cookie);
383 static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size,
384 size_t granule, void *cookie)
386 arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
387 ARM_SMMU_CB_S1_TLBIVAL);
388 arm_smmu_tlb_sync_context(cookie);
391 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
392 unsigned long iova, size_t granule,
393 void *cookie)
395 arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
396 ARM_SMMU_CB_S1_TLBIVAL);
399 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
400 size_t granule, void *cookie)
402 arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
403 ARM_SMMU_CB_S2_TLBIIPAS2);
404 arm_smmu_tlb_sync_context(cookie);
407 static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size,
408 size_t granule, void *cookie)
410 arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
411 ARM_SMMU_CB_S2_TLBIIPAS2L);
412 arm_smmu_tlb_sync_context(cookie);
415 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
416 unsigned long iova, size_t granule,
417 void *cookie)
419 arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
420 ARM_SMMU_CB_S2_TLBIIPAS2L);
423 static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size,
424 size_t granule, void *cookie)
426 arm_smmu_tlb_inv_context_s2(cookie);
429 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
430 * almost negligible, but the benefit of getting the first one in as far ahead
431 * of the sync as possible is significant, hence we don't just make this a
432 * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
433 * think.
435 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
436 unsigned long iova, size_t granule,
437 void *cookie)
439 struct arm_smmu_domain *smmu_domain = cookie;
440 struct arm_smmu_device *smmu = smmu_domain->smmu;
442 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
443 wmb();
445 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
448 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
449 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
450 .tlb_flush_walk = arm_smmu_tlb_inv_walk_s1,
451 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s1,
452 .tlb_add_page = arm_smmu_tlb_add_page_s1,
455 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
456 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
457 .tlb_flush_walk = arm_smmu_tlb_inv_walk_s2,
458 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf_s2,
459 .tlb_add_page = arm_smmu_tlb_add_page_s2,
462 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
463 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
464 .tlb_flush_walk = arm_smmu_tlb_inv_any_s2_v1,
465 .tlb_flush_leaf = arm_smmu_tlb_inv_any_s2_v1,
466 .tlb_add_page = arm_smmu_tlb_add_page_s2_v1,
469 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
471 u32 fsr, fsynr, cbfrsynra;
472 unsigned long iova;
473 struct iommu_domain *domain = dev;
474 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
475 struct arm_smmu_device *smmu = smmu_domain->smmu;
476 int idx = smmu_domain->cfg.cbndx;
478 fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
479 if (!(fsr & ARM_SMMU_FSR_FAULT))
480 return IRQ_NONE;
482 fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
483 iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
484 cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
486 dev_err_ratelimited(smmu->dev,
487 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
488 fsr, iova, fsynr, cbfrsynra, idx);
490 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
491 return IRQ_HANDLED;
494 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
496 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
497 struct arm_smmu_device *smmu = dev;
498 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
499 DEFAULT_RATELIMIT_BURST);
501 gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
502 gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
503 gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
504 gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
506 if (!gfsr)
507 return IRQ_NONE;
509 if (__ratelimit(&rs)) {
510 if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
511 (gfsr & ARM_SMMU_sGFSR_USF))
512 dev_err(smmu->dev,
513 "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
514 (u16)gfsynr1);
515 else
516 dev_err(smmu->dev,
517 "Unexpected global fault, this could be serious\n");
518 dev_err(smmu->dev,
519 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
520 gfsr, gfsynr0, gfsynr1, gfsynr2);
523 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
524 return IRQ_HANDLED;
527 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
528 struct io_pgtable_cfg *pgtbl_cfg)
530 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
531 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
532 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
534 cb->cfg = cfg;
536 /* TCR */
537 if (stage1) {
538 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
539 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
540 } else {
541 cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
542 cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
543 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
544 cb->tcr[1] |= ARM_SMMU_TCR2_AS;
545 else
546 cb->tcr[0] |= ARM_SMMU_TCR_EAE;
548 } else {
549 cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
552 /* TTBRs */
553 if (stage1) {
554 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
555 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
556 cb->ttbr[1] = 0;
557 } else {
558 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
559 cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID,
560 cfg->asid);
561 cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
562 cfg->asid);
564 } else {
565 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
568 /* MAIRs (stage-1 only) */
569 if (stage1) {
570 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
571 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
572 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
573 } else {
574 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
575 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
580 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
582 u32 reg;
583 bool stage1;
584 struct arm_smmu_cb *cb = &smmu->cbs[idx];
585 struct arm_smmu_cfg *cfg = cb->cfg;
587 /* Unassigned context banks only need disabling */
588 if (!cfg) {
589 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
590 return;
593 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
595 /* CBA2R */
596 if (smmu->version > ARM_SMMU_V1) {
597 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
598 reg = ARM_SMMU_CBA2R_VA64;
599 else
600 reg = 0;
601 /* 16-bit VMIDs live in CBA2R */
602 if (smmu->features & ARM_SMMU_FEAT_VMID16)
603 reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
605 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
608 /* CBAR */
609 reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
610 if (smmu->version < ARM_SMMU_V2)
611 reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
614 * Use the weakest shareability/memory types, so they are
615 * overridden by the ttbcr/pte.
617 if (stage1) {
618 reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
619 ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
620 FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
621 ARM_SMMU_CBAR_S1_MEMATTR_WB);
622 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
623 /* 8-bit VMIDs live in CBAR */
624 reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
626 arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
629 * TCR
630 * We must write this before the TTBRs, since it determines the
631 * access behaviour of some fields (in particular, ASID[15:8]).
633 if (stage1 && smmu->version > ARM_SMMU_V1)
634 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
635 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
637 /* TTBRs */
638 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
639 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
640 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
641 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
642 } else {
643 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
644 if (stage1)
645 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
646 cb->ttbr[1]);
649 /* MAIRs (stage-1 only) */
650 if (stage1) {
651 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
652 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
655 /* SCTLR */
656 reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
657 ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
658 if (stage1)
659 reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
660 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
661 reg |= ARM_SMMU_SCTLR_E;
663 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
666 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
667 struct arm_smmu_device *smmu)
669 int irq, start, ret = 0;
670 unsigned long ias, oas;
671 struct io_pgtable_ops *pgtbl_ops;
672 struct io_pgtable_cfg pgtbl_cfg;
673 enum io_pgtable_fmt fmt;
674 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
675 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
677 mutex_lock(&smmu_domain->init_mutex);
678 if (smmu_domain->smmu)
679 goto out_unlock;
681 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
682 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
683 smmu_domain->smmu = smmu;
684 goto out_unlock;
688 * Mapping the requested stage onto what we support is surprisingly
689 * complicated, mainly because the spec allows S1+S2 SMMUs without
690 * support for nested translation. That means we end up with the
691 * following table:
693 * Requested Supported Actual
694 * S1 N S1
695 * S1 S1+S2 S1
696 * S1 S2 S2
697 * S1 S1 S1
698 * N N N
699 * N S1+S2 S2
700 * N S2 S2
701 * N S1 S1
703 * Note that you can't actually request stage-2 mappings.
705 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
706 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
707 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
708 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
711 * Choosing a suitable context format is even more fiddly. Until we
712 * grow some way for the caller to express a preference, and/or move
713 * the decision into the io-pgtable code where it arguably belongs,
714 * just aim for the closest thing to the rest of the system, and hope
715 * that the hardware isn't esoteric enough that we can't assume AArch64
716 * support to be a superset of AArch32 support...
718 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
719 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
720 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
721 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
722 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
723 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
724 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
725 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
726 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
727 ARM_SMMU_FEAT_FMT_AARCH64_16K |
728 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
729 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
731 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
732 ret = -EINVAL;
733 goto out_unlock;
736 switch (smmu_domain->stage) {
737 case ARM_SMMU_DOMAIN_S1:
738 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
739 start = smmu->num_s2_context_banks;
740 ias = smmu->va_size;
741 oas = smmu->ipa_size;
742 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
743 fmt = ARM_64_LPAE_S1;
744 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
745 fmt = ARM_32_LPAE_S1;
746 ias = min(ias, 32UL);
747 oas = min(oas, 40UL);
748 } else {
749 fmt = ARM_V7S;
750 ias = min(ias, 32UL);
751 oas = min(oas, 32UL);
753 smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
754 break;
755 case ARM_SMMU_DOMAIN_NESTED:
757 * We will likely want to change this if/when KVM gets
758 * involved.
760 case ARM_SMMU_DOMAIN_S2:
761 cfg->cbar = CBAR_TYPE_S2_TRANS;
762 start = 0;
763 ias = smmu->ipa_size;
764 oas = smmu->pa_size;
765 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
766 fmt = ARM_64_LPAE_S2;
767 } else {
768 fmt = ARM_32_LPAE_S2;
769 ias = min(ias, 40UL);
770 oas = min(oas, 40UL);
772 if (smmu->version == ARM_SMMU_V2)
773 smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
774 else
775 smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
776 break;
777 default:
778 ret = -EINVAL;
779 goto out_unlock;
781 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
782 smmu->num_context_banks);
783 if (ret < 0)
784 goto out_unlock;
786 cfg->cbndx = ret;
787 if (smmu->version < ARM_SMMU_V2) {
788 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
789 cfg->irptndx %= smmu->num_context_irqs;
790 } else {
791 cfg->irptndx = cfg->cbndx;
794 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
795 cfg->vmid = cfg->cbndx + 1;
796 else
797 cfg->asid = cfg->cbndx;
799 smmu_domain->smmu = smmu;
800 if (smmu->impl && smmu->impl->init_context) {
801 ret = smmu->impl->init_context(smmu_domain);
802 if (ret)
803 goto out_unlock;
806 pgtbl_cfg = (struct io_pgtable_cfg) {
807 .pgsize_bitmap = smmu->pgsize_bitmap,
808 .ias = ias,
809 .oas = oas,
810 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
811 .tlb = smmu_domain->flush_ops,
812 .iommu_dev = smmu->dev,
815 if (smmu_domain->non_strict)
816 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
818 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
819 if (!pgtbl_ops) {
820 ret = -ENOMEM;
821 goto out_clear_smmu;
824 /* Update the domain's page sizes to reflect the page table format */
825 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
826 domain->geometry.aperture_end = (1UL << ias) - 1;
827 domain->geometry.force_aperture = true;
829 /* Initialise the context bank with our page table cfg */
830 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
831 arm_smmu_write_context_bank(smmu, cfg->cbndx);
834 * Request context fault interrupt. Do this last to avoid the
835 * handler seeing a half-initialised domain state.
837 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
838 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
839 IRQF_SHARED, "arm-smmu-context-fault", domain);
840 if (ret < 0) {
841 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
842 cfg->irptndx, irq);
843 cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
846 mutex_unlock(&smmu_domain->init_mutex);
848 /* Publish page table ops for map/unmap */
849 smmu_domain->pgtbl_ops = pgtbl_ops;
850 return 0;
852 out_clear_smmu:
853 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
854 smmu_domain->smmu = NULL;
855 out_unlock:
856 mutex_unlock(&smmu_domain->init_mutex);
857 return ret;
860 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
862 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
863 struct arm_smmu_device *smmu = smmu_domain->smmu;
864 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
865 int ret, irq;
867 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
868 return;
870 ret = arm_smmu_rpm_get(smmu);
871 if (ret < 0)
872 return;
875 * Disable the context bank and free the page tables before freeing
876 * it.
878 smmu->cbs[cfg->cbndx].cfg = NULL;
879 arm_smmu_write_context_bank(smmu, cfg->cbndx);
881 if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
882 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
883 devm_free_irq(smmu->dev, irq, domain);
886 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
887 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
889 arm_smmu_rpm_put(smmu);
892 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
894 struct arm_smmu_domain *smmu_domain;
896 if (type != IOMMU_DOMAIN_UNMANAGED &&
897 type != IOMMU_DOMAIN_DMA &&
898 type != IOMMU_DOMAIN_IDENTITY)
899 return NULL;
901 * Allocate the domain and initialise some of its data structures.
902 * We can't really do anything meaningful until we've added a
903 * master.
905 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
906 if (!smmu_domain)
907 return NULL;
909 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
910 iommu_get_dma_cookie(&smmu_domain->domain))) {
911 kfree(smmu_domain);
912 return NULL;
915 mutex_init(&smmu_domain->init_mutex);
916 spin_lock_init(&smmu_domain->cb_lock);
918 return &smmu_domain->domain;
921 static void arm_smmu_domain_free(struct iommu_domain *domain)
923 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
926 * Free the domain resources. We assume that all devices have
927 * already been detached.
929 iommu_put_dma_cookie(domain);
930 arm_smmu_destroy_domain_context(domain);
931 kfree(smmu_domain);
934 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
936 struct arm_smmu_smr *smr = smmu->smrs + idx;
937 u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
938 FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
940 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
941 reg |= ARM_SMMU_SMR_VALID;
942 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
945 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
947 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
948 u32 reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
949 FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
950 FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
952 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
953 smmu->smrs[idx].valid)
954 reg |= ARM_SMMU_S2CR_EXIDVALID;
955 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
958 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
960 arm_smmu_write_s2cr(smmu, idx);
961 if (smmu->smrs)
962 arm_smmu_write_smr(smmu, idx);
966 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
967 * should be called after sCR0 is written.
969 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
971 u32 smr;
972 int i;
974 if (!smmu->smrs)
975 return;
977 * If we've had to accommodate firmware memory regions, we may
978 * have live SMRs by now; tread carefully...
980 * Somewhat perversely, not having a free SMR for this test implies we
981 * can get away without it anyway, as we'll only be able to 'allocate'
982 * these SMRs for the ID/mask values we're already trusting to be OK.
984 for (i = 0; i < smmu->num_mapping_groups; i++)
985 if (!smmu->smrs[i].valid)
986 goto smr_ok;
987 return;
988 smr_ok:
990 * SMR.ID bits may not be preserved if the corresponding MASK
991 * bits are set, so check each one separately. We can reject
992 * masters later if they try to claim IDs outside these masks.
994 smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
995 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
996 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
997 smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
999 smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
1000 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
1001 smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
1002 smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
1005 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1007 struct arm_smmu_smr *smrs = smmu->smrs;
1008 int i, free_idx = -ENOSPC;
1010 /* Stream indexing is blissfully easy */
1011 if (!smrs)
1012 return id;
1014 /* Validating SMRs is... less so */
1015 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1016 if (!smrs[i].valid) {
1018 * Note the first free entry we come across, which
1019 * we'll claim in the end if nothing else matches.
1021 if (free_idx < 0)
1022 free_idx = i;
1023 continue;
1026 * If the new entry is _entirely_ matched by an existing entry,
1027 * then reuse that, with the guarantee that there also cannot
1028 * be any subsequent conflicting entries. In normal use we'd
1029 * expect simply identical entries for this case, but there's
1030 * no harm in accommodating the generalisation.
1032 if ((mask & smrs[i].mask) == mask &&
1033 !((id ^ smrs[i].id) & ~smrs[i].mask))
1034 return i;
1036 * If the new entry has any other overlap with an existing one,
1037 * though, then there always exists at least one stream ID
1038 * which would cause a conflict, and we can't allow that risk.
1040 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1041 return -EINVAL;
1044 return free_idx;
1047 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1049 if (--smmu->s2crs[idx].count)
1050 return false;
1052 smmu->s2crs[idx] = s2cr_init_val;
1053 if (smmu->smrs)
1054 smmu->smrs[idx].valid = false;
1056 return true;
1059 static int arm_smmu_master_alloc_smes(struct device *dev)
1061 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1062 struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1063 struct arm_smmu_device *smmu = cfg->smmu;
1064 struct arm_smmu_smr *smrs = smmu->smrs;
1065 struct iommu_group *group;
1066 int i, idx, ret;
1068 mutex_lock(&smmu->stream_map_mutex);
1069 /* Figure out a viable stream map entry allocation */
1070 for_each_cfg_sme(cfg, fwspec, i, idx) {
1071 u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1072 u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1074 if (idx != INVALID_SMENDX) {
1075 ret = -EEXIST;
1076 goto out_err;
1079 ret = arm_smmu_find_sme(smmu, sid, mask);
1080 if (ret < 0)
1081 goto out_err;
1083 idx = ret;
1084 if (smrs && smmu->s2crs[idx].count == 0) {
1085 smrs[idx].id = sid;
1086 smrs[idx].mask = mask;
1087 smrs[idx].valid = true;
1089 smmu->s2crs[idx].count++;
1090 cfg->smendx[i] = (s16)idx;
1093 group = iommu_group_get_for_dev(dev);
1094 if (IS_ERR(group)) {
1095 ret = PTR_ERR(group);
1096 goto out_err;
1098 iommu_group_put(group);
1100 /* It worked! Now, poke the actual hardware */
1101 for_each_cfg_sme(cfg, fwspec, i, idx) {
1102 arm_smmu_write_sme(smmu, idx);
1103 smmu->s2crs[idx].group = group;
1106 mutex_unlock(&smmu->stream_map_mutex);
1107 return 0;
1109 out_err:
1110 while (i--) {
1111 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1112 cfg->smendx[i] = INVALID_SMENDX;
1114 mutex_unlock(&smmu->stream_map_mutex);
1115 return ret;
1118 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1119 struct iommu_fwspec *fwspec)
1121 struct arm_smmu_device *smmu = cfg->smmu;
1122 int i, idx;
1124 mutex_lock(&smmu->stream_map_mutex);
1125 for_each_cfg_sme(cfg, fwspec, i, idx) {
1126 if (arm_smmu_free_sme(smmu, idx))
1127 arm_smmu_write_sme(smmu, idx);
1128 cfg->smendx[i] = INVALID_SMENDX;
1130 mutex_unlock(&smmu->stream_map_mutex);
1133 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1134 struct arm_smmu_master_cfg *cfg,
1135 struct iommu_fwspec *fwspec)
1137 struct arm_smmu_device *smmu = smmu_domain->smmu;
1138 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1139 u8 cbndx = smmu_domain->cfg.cbndx;
1140 enum arm_smmu_s2cr_type type;
1141 int i, idx;
1143 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1144 type = S2CR_TYPE_BYPASS;
1145 else
1146 type = S2CR_TYPE_TRANS;
1148 for_each_cfg_sme(cfg, fwspec, i, idx) {
1149 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1150 continue;
1152 s2cr[idx].type = type;
1153 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1154 s2cr[idx].cbndx = cbndx;
1155 arm_smmu_write_s2cr(smmu, idx);
1157 return 0;
1160 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1162 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1163 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1164 struct arm_smmu_master_cfg *cfg;
1165 struct arm_smmu_device *smmu;
1166 int ret;
1168 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1169 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1170 return -ENXIO;
1174 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1175 * domains between of_xlate() and add_device() - we have no way to cope
1176 * with that, so until ARM gets converted to rely on groups and default
1177 * domains, just say no (but more politely than by dereferencing NULL).
1178 * This should be at least a WARN_ON once that's sorted.
1180 cfg = dev_iommu_priv_get(dev);
1181 if (!cfg)
1182 return -ENODEV;
1184 smmu = cfg->smmu;
1186 ret = arm_smmu_rpm_get(smmu);
1187 if (ret < 0)
1188 return ret;
1190 /* Ensure that the domain is finalised */
1191 ret = arm_smmu_init_domain_context(domain, smmu);
1192 if (ret < 0)
1193 goto rpm_put;
1196 * Sanity check the domain. We don't support domains across
1197 * different SMMUs.
1199 if (smmu_domain->smmu != smmu) {
1200 dev_err(dev,
1201 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1202 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1203 ret = -EINVAL;
1204 goto rpm_put;
1207 /* Looks ok, so add the device to the domain */
1208 ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1211 * Setup an autosuspend delay to avoid bouncing runpm state.
1212 * Otherwise, if a driver for a suspended consumer device
1213 * unmaps buffers, it will runpm resume/suspend for each one.
1215 * For example, when used by a GPU device, when an application
1216 * or game exits, it can trigger unmapping 100s or 1000s of
1217 * buffers. With a runpm cycle for each buffer, that adds up
1218 * to 5-10sec worth of reprogramming the context bank, while
1219 * the system appears to be locked up to the user.
1221 pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1222 pm_runtime_use_autosuspend(smmu->dev);
1224 rpm_put:
1225 arm_smmu_rpm_put(smmu);
1226 return ret;
1229 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1230 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
1232 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1233 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1234 int ret;
1236 if (!ops)
1237 return -ENODEV;
1239 arm_smmu_rpm_get(smmu);
1240 ret = ops->map(ops, iova, paddr, size, prot);
1241 arm_smmu_rpm_put(smmu);
1243 return ret;
1246 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1247 size_t size, struct iommu_iotlb_gather *gather)
1249 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1250 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1251 size_t ret;
1253 if (!ops)
1254 return 0;
1256 arm_smmu_rpm_get(smmu);
1257 ret = ops->unmap(ops, iova, size, gather);
1258 arm_smmu_rpm_put(smmu);
1260 return ret;
1263 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1265 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1266 struct arm_smmu_device *smmu = smmu_domain->smmu;
1268 if (smmu_domain->flush_ops) {
1269 arm_smmu_rpm_get(smmu);
1270 smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1271 arm_smmu_rpm_put(smmu);
1275 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1276 struct iommu_iotlb_gather *gather)
1278 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1279 struct arm_smmu_device *smmu = smmu_domain->smmu;
1281 if (!smmu)
1282 return;
1284 arm_smmu_rpm_get(smmu);
1285 if (smmu->version == ARM_SMMU_V2 ||
1286 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1287 arm_smmu_tlb_sync_context(smmu_domain);
1288 else
1289 arm_smmu_tlb_sync_global(smmu);
1290 arm_smmu_rpm_put(smmu);
1293 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1294 dma_addr_t iova)
1296 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1297 struct arm_smmu_device *smmu = smmu_domain->smmu;
1298 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1299 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1300 struct device *dev = smmu->dev;
1301 void __iomem *reg;
1302 u32 tmp;
1303 u64 phys;
1304 unsigned long va, flags;
1305 int ret, idx = cfg->cbndx;
1307 ret = arm_smmu_rpm_get(smmu);
1308 if (ret < 0)
1309 return 0;
1311 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1312 va = iova & ~0xfffUL;
1313 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1314 arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1315 else
1316 arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1318 reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1319 if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1320 5, 50)) {
1321 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1322 dev_err(dev,
1323 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1324 &iova);
1325 return ops->iova_to_phys(ops, iova);
1328 phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1329 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1330 if (phys & ARM_SMMU_CB_PAR_F) {
1331 dev_err(dev, "translation fault!\n");
1332 dev_err(dev, "PAR = 0x%llx\n", phys);
1333 return 0;
1336 arm_smmu_rpm_put(smmu);
1338 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1341 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1342 dma_addr_t iova)
1344 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1345 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1347 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1348 return iova;
1350 if (!ops)
1351 return 0;
1353 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1354 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1355 return arm_smmu_iova_to_phys_hard(domain, iova);
1357 return ops->iova_to_phys(ops, iova);
1360 static bool arm_smmu_capable(enum iommu_cap cap)
1362 switch (cap) {
1363 case IOMMU_CAP_CACHE_COHERENCY:
1365 * Return true here as the SMMU can always send out coherent
1366 * requests.
1368 return true;
1369 case IOMMU_CAP_NOEXEC:
1370 return true;
1371 default:
1372 return false;
1376 static
1377 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1379 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1380 fwnode);
1381 put_device(dev);
1382 return dev ? dev_get_drvdata(dev) : NULL;
1385 static int arm_smmu_add_device(struct device *dev)
1387 struct arm_smmu_device *smmu = NULL;
1388 struct arm_smmu_master_cfg *cfg;
1389 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1390 int i, ret;
1392 if (using_legacy_binding) {
1393 ret = arm_smmu_register_legacy_master(dev, &smmu);
1396 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1397 * will allocate/initialise a new one. Thus we need to update fwspec for
1398 * later use.
1400 fwspec = dev_iommu_fwspec_get(dev);
1401 if (ret)
1402 goto out_free;
1403 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1404 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1405 } else {
1406 return -ENODEV;
1409 ret = -EINVAL;
1410 for (i = 0; i < fwspec->num_ids; i++) {
1411 u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1412 u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1414 if (sid & ~smmu->streamid_mask) {
1415 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1416 sid, smmu->streamid_mask);
1417 goto out_free;
1419 if (mask & ~smmu->smr_mask_mask) {
1420 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1421 mask, smmu->smr_mask_mask);
1422 goto out_free;
1426 ret = -ENOMEM;
1427 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1428 GFP_KERNEL);
1429 if (!cfg)
1430 goto out_free;
1432 cfg->smmu = smmu;
1433 dev_iommu_priv_set(dev, cfg);
1434 while (i--)
1435 cfg->smendx[i] = INVALID_SMENDX;
1437 ret = arm_smmu_rpm_get(smmu);
1438 if (ret < 0)
1439 goto out_cfg_free;
1441 ret = arm_smmu_master_alloc_smes(dev);
1442 arm_smmu_rpm_put(smmu);
1444 if (ret)
1445 goto out_cfg_free;
1447 iommu_device_link(&smmu->iommu, dev);
1449 device_link_add(dev, smmu->dev,
1450 DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1452 return 0;
1454 out_cfg_free:
1455 kfree(cfg);
1456 out_free:
1457 iommu_fwspec_free(dev);
1458 return ret;
1461 static void arm_smmu_remove_device(struct device *dev)
1463 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1464 struct arm_smmu_master_cfg *cfg;
1465 struct arm_smmu_device *smmu;
1466 int ret;
1468 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1469 return;
1471 cfg = dev_iommu_priv_get(dev);
1472 smmu = cfg->smmu;
1474 ret = arm_smmu_rpm_get(smmu);
1475 if (ret < 0)
1476 return;
1478 iommu_device_unlink(&smmu->iommu, dev);
1479 arm_smmu_master_free_smes(cfg, fwspec);
1481 arm_smmu_rpm_put(smmu);
1483 dev_iommu_priv_set(dev, NULL);
1484 iommu_group_remove_device(dev);
1485 kfree(cfg);
1486 iommu_fwspec_free(dev);
1489 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1491 struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1492 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1493 struct arm_smmu_device *smmu = cfg->smmu;
1494 struct iommu_group *group = NULL;
1495 int i, idx;
1497 for_each_cfg_sme(cfg, fwspec, i, idx) {
1498 if (group && smmu->s2crs[idx].group &&
1499 group != smmu->s2crs[idx].group)
1500 return ERR_PTR(-EINVAL);
1502 group = smmu->s2crs[idx].group;
1505 if (group)
1506 return iommu_group_ref_get(group);
1508 if (dev_is_pci(dev))
1509 group = pci_device_group(dev);
1510 else if (dev_is_fsl_mc(dev))
1511 group = fsl_mc_device_group(dev);
1512 else
1513 group = generic_device_group(dev);
1515 return group;
1518 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1519 enum iommu_attr attr, void *data)
1521 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1523 switch(domain->type) {
1524 case IOMMU_DOMAIN_UNMANAGED:
1525 switch (attr) {
1526 case DOMAIN_ATTR_NESTING:
1527 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1528 return 0;
1529 default:
1530 return -ENODEV;
1532 break;
1533 case IOMMU_DOMAIN_DMA:
1534 switch (attr) {
1535 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1536 *(int *)data = smmu_domain->non_strict;
1537 return 0;
1538 default:
1539 return -ENODEV;
1541 break;
1542 default:
1543 return -EINVAL;
1547 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1548 enum iommu_attr attr, void *data)
1550 int ret = 0;
1551 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1553 mutex_lock(&smmu_domain->init_mutex);
1555 switch(domain->type) {
1556 case IOMMU_DOMAIN_UNMANAGED:
1557 switch (attr) {
1558 case DOMAIN_ATTR_NESTING:
1559 if (smmu_domain->smmu) {
1560 ret = -EPERM;
1561 goto out_unlock;
1564 if (*(int *)data)
1565 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1566 else
1567 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1568 break;
1569 default:
1570 ret = -ENODEV;
1572 break;
1573 case IOMMU_DOMAIN_DMA:
1574 switch (attr) {
1575 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
1576 smmu_domain->non_strict = *(int *)data;
1577 break;
1578 default:
1579 ret = -ENODEV;
1581 break;
1582 default:
1583 ret = -EINVAL;
1585 out_unlock:
1586 mutex_unlock(&smmu_domain->init_mutex);
1587 return ret;
1590 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1592 u32 mask, fwid = 0;
1594 if (args->args_count > 0)
1595 fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1597 if (args->args_count > 1)
1598 fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1599 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1600 fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1602 return iommu_fwspec_add_ids(dev, &fwid, 1);
1605 static void arm_smmu_get_resv_regions(struct device *dev,
1606 struct list_head *head)
1608 struct iommu_resv_region *region;
1609 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1611 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1612 prot, IOMMU_RESV_SW_MSI);
1613 if (!region)
1614 return;
1616 list_add_tail(&region->list, head);
1618 iommu_dma_get_resv_regions(dev, head);
1621 static struct iommu_ops arm_smmu_ops = {
1622 .capable = arm_smmu_capable,
1623 .domain_alloc = arm_smmu_domain_alloc,
1624 .domain_free = arm_smmu_domain_free,
1625 .attach_dev = arm_smmu_attach_dev,
1626 .map = arm_smmu_map,
1627 .unmap = arm_smmu_unmap,
1628 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
1629 .iotlb_sync = arm_smmu_iotlb_sync,
1630 .iova_to_phys = arm_smmu_iova_to_phys,
1631 .add_device = arm_smmu_add_device,
1632 .remove_device = arm_smmu_remove_device,
1633 .device_group = arm_smmu_device_group,
1634 .domain_get_attr = arm_smmu_domain_get_attr,
1635 .domain_set_attr = arm_smmu_domain_set_attr,
1636 .of_xlate = arm_smmu_of_xlate,
1637 .get_resv_regions = arm_smmu_get_resv_regions,
1638 .put_resv_regions = generic_iommu_put_resv_regions,
1639 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1642 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1644 int i;
1645 u32 reg;
1647 /* clear global FSR */
1648 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1649 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1652 * Reset stream mapping groups: Initial values mark all SMRn as
1653 * invalid and all S2CRn as bypass unless overridden.
1655 for (i = 0; i < smmu->num_mapping_groups; ++i)
1656 arm_smmu_write_sme(smmu, i);
1658 /* Make sure all context banks are disabled and clear CB_FSR */
1659 for (i = 0; i < smmu->num_context_banks; ++i) {
1660 arm_smmu_write_context_bank(smmu, i);
1661 arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1664 /* Invalidate the TLB, just in case */
1665 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1666 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1668 reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1670 /* Enable fault reporting */
1671 reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1672 ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1674 /* Disable TLB broadcasting. */
1675 reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1677 /* Enable client access, handling unmatched streams as appropriate */
1678 reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1679 if (disable_bypass)
1680 reg |= ARM_SMMU_sCR0_USFCFG;
1681 else
1682 reg &= ~ARM_SMMU_sCR0_USFCFG;
1684 /* Disable forced broadcasting */
1685 reg &= ~ARM_SMMU_sCR0_FB;
1687 /* Don't upgrade barriers */
1688 reg &= ~(ARM_SMMU_sCR0_BSU);
1690 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1691 reg |= ARM_SMMU_sCR0_VMID16EN;
1693 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1694 reg |= ARM_SMMU_sCR0_EXIDENABLE;
1696 if (smmu->impl && smmu->impl->reset)
1697 smmu->impl->reset(smmu);
1699 /* Push the button */
1700 arm_smmu_tlb_sync_global(smmu);
1701 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1704 static int arm_smmu_id_size_to_bits(int size)
1706 switch (size) {
1707 case 0:
1708 return 32;
1709 case 1:
1710 return 36;
1711 case 2:
1712 return 40;
1713 case 3:
1714 return 42;
1715 case 4:
1716 return 44;
1717 case 5:
1718 default:
1719 return 48;
1723 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1725 unsigned int size;
1726 u32 id;
1727 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1728 int i;
1730 dev_notice(smmu->dev, "probing hardware configuration...\n");
1731 dev_notice(smmu->dev, "SMMUv%d with:\n",
1732 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1734 /* ID0 */
1735 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1737 /* Restrict available stages based on module parameter */
1738 if (force_stage == 1)
1739 id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1740 else if (force_stage == 2)
1741 id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1743 if (id & ARM_SMMU_ID0_S1TS) {
1744 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1745 dev_notice(smmu->dev, "\tstage 1 translation\n");
1748 if (id & ARM_SMMU_ID0_S2TS) {
1749 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1750 dev_notice(smmu->dev, "\tstage 2 translation\n");
1753 if (id & ARM_SMMU_ID0_NTS) {
1754 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1755 dev_notice(smmu->dev, "\tnested translation\n");
1758 if (!(smmu->features &
1759 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1760 dev_err(smmu->dev, "\tno translation support!\n");
1761 return -ENODEV;
1764 if ((id & ARM_SMMU_ID0_S1TS) &&
1765 ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1766 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1767 dev_notice(smmu->dev, "\taddress translation ops\n");
1771 * In order for DMA API calls to work properly, we must defer to what
1772 * the FW says about coherency, regardless of what the hardware claims.
1773 * Fortunately, this also opens up a workaround for systems where the
1774 * ID register value has ended up configured incorrectly.
1776 cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1777 if (cttw_fw || cttw_reg)
1778 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1779 cttw_fw ? "" : "non-");
1780 if (cttw_fw != cttw_reg)
1781 dev_notice(smmu->dev,
1782 "\t(IDR0.CTTW overridden by FW configuration)\n");
1784 /* Max. number of entries we have for stream matching/indexing */
1785 if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1786 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1787 size = 1 << 16;
1788 } else {
1789 size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1791 smmu->streamid_mask = size - 1;
1792 if (id & ARM_SMMU_ID0_SMS) {
1793 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1794 size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1795 if (size == 0) {
1796 dev_err(smmu->dev,
1797 "stream-matching supported, but no SMRs present!\n");
1798 return -ENODEV;
1801 /* Zero-initialised to mark as invalid */
1802 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1803 GFP_KERNEL);
1804 if (!smmu->smrs)
1805 return -ENOMEM;
1807 dev_notice(smmu->dev,
1808 "\tstream matching with %u register groups", size);
1810 /* s2cr->type == 0 means translation, so initialise explicitly */
1811 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1812 GFP_KERNEL);
1813 if (!smmu->s2crs)
1814 return -ENOMEM;
1815 for (i = 0; i < size; i++)
1816 smmu->s2crs[i] = s2cr_init_val;
1818 smmu->num_mapping_groups = size;
1819 mutex_init(&smmu->stream_map_mutex);
1820 spin_lock_init(&smmu->global_sync_lock);
1822 if (smmu->version < ARM_SMMU_V2 ||
1823 !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1824 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1825 if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1826 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1829 /* ID1 */
1830 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1831 smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1833 /* Check for size mismatch of SMMU address space from mapped region */
1834 size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1835 if (smmu->numpage != 2 * size << smmu->pgshift)
1836 dev_warn(smmu->dev,
1837 "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1838 2 * size << smmu->pgshift, smmu->numpage);
1839 /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1840 smmu->numpage = size;
1842 smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1843 smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1844 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1845 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1846 return -ENODEV;
1848 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1849 smmu->num_context_banks, smmu->num_s2_context_banks);
1850 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1851 sizeof(*smmu->cbs), GFP_KERNEL);
1852 if (!smmu->cbs)
1853 return -ENOMEM;
1855 /* ID2 */
1856 id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1857 size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1858 smmu->ipa_size = size;
1860 /* The output mask is also applied for bypass */
1861 size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1862 smmu->pa_size = size;
1864 if (id & ARM_SMMU_ID2_VMID16)
1865 smmu->features |= ARM_SMMU_FEAT_VMID16;
1868 * What the page table walker can address actually depends on which
1869 * descriptor format is in use, but since a) we don't know that yet,
1870 * and b) it can vary per context bank, this will have to do...
1872 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1873 dev_warn(smmu->dev,
1874 "failed to set DMA mask for table walker\n");
1876 if (smmu->version < ARM_SMMU_V2) {
1877 smmu->va_size = smmu->ipa_size;
1878 if (smmu->version == ARM_SMMU_V1_64K)
1879 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1880 } else {
1881 size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1882 smmu->va_size = arm_smmu_id_size_to_bits(size);
1883 if (id & ARM_SMMU_ID2_PTFS_4K)
1884 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1885 if (id & ARM_SMMU_ID2_PTFS_16K)
1886 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1887 if (id & ARM_SMMU_ID2_PTFS_64K)
1888 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1891 /* Now we've corralled the various formats, what'll it do? */
1892 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1893 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1894 if (smmu->features &
1895 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1896 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1897 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1898 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1899 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1900 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1902 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1903 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1904 else
1905 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1906 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1907 smmu->pgsize_bitmap);
1910 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1911 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1912 smmu->va_size, smmu->ipa_size);
1914 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1915 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1916 smmu->ipa_size, smmu->pa_size);
1918 if (smmu->impl && smmu->impl->cfg_probe)
1919 return smmu->impl->cfg_probe(smmu);
1921 return 0;
1924 struct arm_smmu_match_data {
1925 enum arm_smmu_arch_version version;
1926 enum arm_smmu_implementation model;
1929 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1930 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1932 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1933 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1934 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1935 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1936 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1937 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1939 static const struct of_device_id arm_smmu_of_match[] = {
1940 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1941 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1942 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1943 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1944 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1945 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1946 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1947 { },
1949 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1951 #ifdef CONFIG_ACPI
1952 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1954 int ret = 0;
1956 switch (model) {
1957 case ACPI_IORT_SMMU_V1:
1958 case ACPI_IORT_SMMU_CORELINK_MMU400:
1959 smmu->version = ARM_SMMU_V1;
1960 smmu->model = GENERIC_SMMU;
1961 break;
1962 case ACPI_IORT_SMMU_CORELINK_MMU401:
1963 smmu->version = ARM_SMMU_V1_64K;
1964 smmu->model = GENERIC_SMMU;
1965 break;
1966 case ACPI_IORT_SMMU_V2:
1967 smmu->version = ARM_SMMU_V2;
1968 smmu->model = GENERIC_SMMU;
1969 break;
1970 case ACPI_IORT_SMMU_CORELINK_MMU500:
1971 smmu->version = ARM_SMMU_V2;
1972 smmu->model = ARM_MMU500;
1973 break;
1974 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1975 smmu->version = ARM_SMMU_V2;
1976 smmu->model = CAVIUM_SMMUV2;
1977 break;
1978 default:
1979 ret = -ENODEV;
1982 return ret;
1985 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1986 struct arm_smmu_device *smmu)
1988 struct device *dev = smmu->dev;
1989 struct acpi_iort_node *node =
1990 *(struct acpi_iort_node **)dev_get_platdata(dev);
1991 struct acpi_iort_smmu *iort_smmu;
1992 int ret;
1994 /* Retrieve SMMU1/2 specific data */
1995 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1997 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1998 if (ret < 0)
1999 return ret;
2001 /* Ignore the configuration access interrupt */
2002 smmu->num_global_irqs = 1;
2004 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
2005 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2007 return 0;
2009 #else
2010 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2011 struct arm_smmu_device *smmu)
2013 return -ENODEV;
2015 #endif
2017 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2018 struct arm_smmu_device *smmu)
2020 const struct arm_smmu_match_data *data;
2021 struct device *dev = &pdev->dev;
2022 bool legacy_binding;
2024 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2025 &smmu->num_global_irqs)) {
2026 dev_err(dev, "missing #global-interrupts property\n");
2027 return -ENODEV;
2030 data = of_device_get_match_data(dev);
2031 smmu->version = data->version;
2032 smmu->model = data->model;
2034 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2035 if (legacy_binding && !using_generic_binding) {
2036 if (!using_legacy_binding) {
2037 pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2038 IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2040 using_legacy_binding = true;
2041 } else if (!legacy_binding && !using_legacy_binding) {
2042 using_generic_binding = true;
2043 } else {
2044 dev_err(dev, "not probing due to mismatched DT properties\n");
2045 return -ENODEV;
2048 if (of_dma_is_coherent(dev->of_node))
2049 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2051 return 0;
2054 static int arm_smmu_bus_init(struct iommu_ops *ops)
2056 int err;
2058 /* Oh, for a proper bus abstraction */
2059 if (!iommu_present(&platform_bus_type)) {
2060 err = bus_set_iommu(&platform_bus_type, ops);
2061 if (err)
2062 return err;
2064 #ifdef CONFIG_ARM_AMBA
2065 if (!iommu_present(&amba_bustype)) {
2066 err = bus_set_iommu(&amba_bustype, ops);
2067 if (err)
2068 goto err_reset_platform_ops;
2070 #endif
2071 #ifdef CONFIG_PCI
2072 if (!iommu_present(&pci_bus_type)) {
2073 err = bus_set_iommu(&pci_bus_type, ops);
2074 if (err)
2075 goto err_reset_amba_ops;
2077 #endif
2078 #ifdef CONFIG_FSL_MC_BUS
2079 if (!iommu_present(&fsl_mc_bus_type)) {
2080 err = bus_set_iommu(&fsl_mc_bus_type, ops);
2081 if (err)
2082 goto err_reset_pci_ops;
2084 #endif
2085 return 0;
2087 err_reset_pci_ops: __maybe_unused;
2088 #ifdef CONFIG_PCI
2089 bus_set_iommu(&pci_bus_type, NULL);
2090 #endif
2091 err_reset_amba_ops: __maybe_unused;
2092 #ifdef CONFIG_ARM_AMBA
2093 bus_set_iommu(&amba_bustype, NULL);
2094 #endif
2095 err_reset_platform_ops: __maybe_unused;
2096 bus_set_iommu(&platform_bus_type, NULL);
2097 return err;
2100 static int arm_smmu_device_probe(struct platform_device *pdev)
2102 struct resource *res;
2103 resource_size_t ioaddr;
2104 struct arm_smmu_device *smmu;
2105 struct device *dev = &pdev->dev;
2106 int num_irqs, i, err;
2108 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2109 if (!smmu) {
2110 dev_err(dev, "failed to allocate arm_smmu_device\n");
2111 return -ENOMEM;
2113 smmu->dev = dev;
2115 if (dev->of_node)
2116 err = arm_smmu_device_dt_probe(pdev, smmu);
2117 else
2118 err = arm_smmu_device_acpi_probe(pdev, smmu);
2120 if (err)
2121 return err;
2123 smmu = arm_smmu_impl_init(smmu);
2124 if (IS_ERR(smmu))
2125 return PTR_ERR(smmu);
2127 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2128 ioaddr = res->start;
2129 smmu->base = devm_ioremap_resource(dev, res);
2130 if (IS_ERR(smmu->base))
2131 return PTR_ERR(smmu->base);
2133 * The resource size should effectively match the value of SMMU_TOP;
2134 * stash that temporarily until we know PAGESIZE to validate it with.
2136 smmu->numpage = resource_size(res);
2138 num_irqs = 0;
2139 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2140 num_irqs++;
2141 if (num_irqs > smmu->num_global_irqs)
2142 smmu->num_context_irqs++;
2145 if (!smmu->num_context_irqs) {
2146 dev_err(dev, "found %d interrupts but expected at least %d\n",
2147 num_irqs, smmu->num_global_irqs + 1);
2148 return -ENODEV;
2151 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2152 GFP_KERNEL);
2153 if (!smmu->irqs) {
2154 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2155 return -ENOMEM;
2158 for (i = 0; i < num_irqs; ++i) {
2159 int irq = platform_get_irq(pdev, i);
2161 if (irq < 0)
2162 return -ENODEV;
2163 smmu->irqs[i] = irq;
2166 err = devm_clk_bulk_get_all(dev, &smmu->clks);
2167 if (err < 0) {
2168 dev_err(dev, "failed to get clocks %d\n", err);
2169 return err;
2171 smmu->num_clks = err;
2173 err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2174 if (err)
2175 return err;
2177 err = arm_smmu_device_cfg_probe(smmu);
2178 if (err)
2179 return err;
2181 if (smmu->version == ARM_SMMU_V2) {
2182 if (smmu->num_context_banks > smmu->num_context_irqs) {
2183 dev_err(dev,
2184 "found only %d context irq(s) but %d required\n",
2185 smmu->num_context_irqs, smmu->num_context_banks);
2186 return -ENODEV;
2189 /* Ignore superfluous interrupts */
2190 smmu->num_context_irqs = smmu->num_context_banks;
2193 for (i = 0; i < smmu->num_global_irqs; ++i) {
2194 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2195 arm_smmu_global_fault,
2196 IRQF_SHARED,
2197 "arm-smmu global fault",
2198 smmu);
2199 if (err) {
2200 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2201 i, smmu->irqs[i]);
2202 return err;
2206 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2207 "smmu.%pa", &ioaddr);
2208 if (err) {
2209 dev_err(dev, "Failed to register iommu in sysfs\n");
2210 return err;
2213 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2214 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2216 err = iommu_device_register(&smmu->iommu);
2217 if (err) {
2218 dev_err(dev, "Failed to register iommu\n");
2219 return err;
2222 platform_set_drvdata(pdev, smmu);
2223 arm_smmu_device_reset(smmu);
2224 arm_smmu_test_smr_masks(smmu);
2227 * We want to avoid touching dev->power.lock in fastpaths unless
2228 * it's really going to do something useful - pm_runtime_enabled()
2229 * can serve as an ideal proxy for that decision. So, conditionally
2230 * enable pm_runtime.
2232 if (dev->pm_domain) {
2233 pm_runtime_set_active(dev);
2234 pm_runtime_enable(dev);
2238 * For ACPI and generic DT bindings, an SMMU will be probed before
2239 * any device which might need it, so we want the bus ops in place
2240 * ready to handle default domain setup as soon as any SMMU exists.
2242 if (!using_legacy_binding)
2243 return arm_smmu_bus_init(&arm_smmu_ops);
2245 return 0;
2248 static int arm_smmu_device_remove(struct platform_device *pdev)
2250 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2252 if (!smmu)
2253 return -ENODEV;
2255 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2256 dev_err(&pdev->dev, "removing device with active domains!\n");
2258 arm_smmu_bus_init(NULL);
2259 iommu_device_unregister(&smmu->iommu);
2260 iommu_device_sysfs_remove(&smmu->iommu);
2262 arm_smmu_rpm_get(smmu);
2263 /* Turn the thing off */
2264 arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2265 arm_smmu_rpm_put(smmu);
2267 if (pm_runtime_enabled(smmu->dev))
2268 pm_runtime_force_suspend(smmu->dev);
2269 else
2270 clk_bulk_disable(smmu->num_clks, smmu->clks);
2272 clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2273 return 0;
2276 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2278 arm_smmu_device_remove(pdev);
2281 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2283 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2284 int ret;
2286 ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2287 if (ret)
2288 return ret;
2290 arm_smmu_device_reset(smmu);
2292 return 0;
2295 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2297 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2299 clk_bulk_disable(smmu->num_clks, smmu->clks);
2301 return 0;
2304 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2306 if (pm_runtime_suspended(dev))
2307 return 0;
2309 return arm_smmu_runtime_resume(dev);
2312 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2314 if (pm_runtime_suspended(dev))
2315 return 0;
2317 return arm_smmu_runtime_suspend(dev);
2320 static const struct dev_pm_ops arm_smmu_pm_ops = {
2321 SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2322 SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2323 arm_smmu_runtime_resume, NULL)
2326 static struct platform_driver arm_smmu_driver = {
2327 .driver = {
2328 .name = "arm-smmu",
2329 .of_match_table = arm_smmu_of_match,
2330 .pm = &arm_smmu_pm_ops,
2331 .suppress_bind_attrs = true,
2333 .probe = arm_smmu_device_probe,
2334 .remove = arm_smmu_device_remove,
2335 .shutdown = arm_smmu_device_shutdown,
2337 module_platform_driver(arm_smmu_driver);
2339 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2340 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2341 MODULE_ALIAS("platform:arm-smmu");
2342 MODULE_LICENSE("GPL v2");