Linux 4.19.133
[linux/fpc-iii.git] / drivers / iommu / arm-smmu.c
blob0c3b8f1c7225e8ec24db9cffa92c570b18b9e7c4
1 /*
2 * IOMMU API for ARM architected SMMU implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * Copyright (C) 2013 ARM Limited
19 * Author: Will Deacon <will.deacon@arm.com>
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
27 * - Extended Stream ID (16 bit)
30 #define pr_fmt(fmt) "arm-smmu: " fmt
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
54 #include <linux/amba/bus.h>
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
60 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
61 * global register space are still, in fact, using a hypervisor to mediate it
62 * by trapping and emulating register accesses. Sadly, some deployed versions
63 * of said trapping code have bugs wherein they go horribly wrong for stores
64 * using r31 (i.e. XZR/WZR) as the source register.
66 #define QCOM_DUMMY_VAL -1
68 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
70 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
71 #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
72 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
74 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
75 #define TLB_SPIN_COUNT 10
77 /* Maximum number of context banks per SMMU */
78 #define ARM_SMMU_MAX_CBS 128
80 /* SMMU global address space */
81 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
82 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
85 * SMMU global address space with conditional offset to access secure
86 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
87 * nsGFSYNR0: 0x450)
89 #define ARM_SMMU_GR0_NS(smmu) \
90 ((smmu)->base + \
91 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
92 ? 0x400 : 0))
95 * Some 64-bit registers only make sense to write atomically, but in such
96 * cases all the data relevant to AArch32 formats lies within the lower word,
97 * therefore this actually makes more sense than it might first appear.
99 #ifdef CONFIG_64BIT
100 #define smmu_write_atomic_lq writeq_relaxed
101 #else
102 #define smmu_write_atomic_lq writel_relaxed
103 #endif
105 /* Translation context bank */
106 #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
108 #define MSI_IOVA_BASE 0x8000000
109 #define MSI_IOVA_LENGTH 0x100000
111 static int force_stage;
112 module_param(force_stage, int, S_IRUGO);
113 MODULE_PARM_DESC(force_stage,
114 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
115 static bool disable_bypass;
116 module_param(disable_bypass, bool, S_IRUGO);
117 MODULE_PARM_DESC(disable_bypass,
118 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
120 enum arm_smmu_arch_version {
121 ARM_SMMU_V1,
122 ARM_SMMU_V1_64K,
123 ARM_SMMU_V2,
126 enum arm_smmu_implementation {
127 GENERIC_SMMU,
128 ARM_MMU500,
129 CAVIUM_SMMUV2,
130 QCOM_SMMUV2,
133 struct arm_smmu_s2cr {
134 struct iommu_group *group;
135 int count;
136 enum arm_smmu_s2cr_type type;
137 enum arm_smmu_s2cr_privcfg privcfg;
138 u8 cbndx;
141 #define s2cr_init_val (struct arm_smmu_s2cr){ \
142 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
145 struct arm_smmu_smr {
146 u16 mask;
147 u16 id;
148 bool valid;
151 struct arm_smmu_cb {
152 u64 ttbr[2];
153 u32 tcr[2];
154 u32 mair[2];
155 struct arm_smmu_cfg *cfg;
158 struct arm_smmu_master_cfg {
159 struct arm_smmu_device *smmu;
160 s16 smendx[];
162 #define INVALID_SMENDX -1
163 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
164 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
165 #define fwspec_smendx(fw, i) \
166 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
167 #define for_each_cfg_sme(fw, i, idx) \
168 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
170 struct arm_smmu_device {
171 struct device *dev;
173 void __iomem *base;
174 void __iomem *cb_base;
175 unsigned long pgshift;
177 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
178 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
179 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
180 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
181 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
182 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
183 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
184 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
185 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
186 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
187 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
188 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
189 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
190 u32 features;
192 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
193 u32 options;
194 enum arm_smmu_arch_version version;
195 enum arm_smmu_implementation model;
197 u32 num_context_banks;
198 u32 num_s2_context_banks;
199 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
200 struct arm_smmu_cb *cbs;
201 atomic_t irptndx;
203 u32 num_mapping_groups;
204 u16 streamid_mask;
205 u16 smr_mask_mask;
206 struct arm_smmu_smr *smrs;
207 struct arm_smmu_s2cr *s2crs;
208 struct mutex stream_map_mutex;
210 unsigned long va_size;
211 unsigned long ipa_size;
212 unsigned long pa_size;
213 unsigned long pgsize_bitmap;
215 u32 num_global_irqs;
216 u32 num_context_irqs;
217 unsigned int *irqs;
219 u32 cavium_id_base; /* Specific to Cavium */
221 spinlock_t global_sync_lock;
223 /* IOMMU core code handle */
224 struct iommu_device iommu;
227 enum arm_smmu_context_fmt {
228 ARM_SMMU_CTX_FMT_NONE,
229 ARM_SMMU_CTX_FMT_AARCH64,
230 ARM_SMMU_CTX_FMT_AARCH32_L,
231 ARM_SMMU_CTX_FMT_AARCH32_S,
234 struct arm_smmu_cfg {
235 u8 cbndx;
236 u8 irptndx;
237 union {
238 u16 asid;
239 u16 vmid;
241 u32 cbar;
242 enum arm_smmu_context_fmt fmt;
244 #define INVALID_IRPTNDX 0xff
246 enum arm_smmu_domain_stage {
247 ARM_SMMU_DOMAIN_S1 = 0,
248 ARM_SMMU_DOMAIN_S2,
249 ARM_SMMU_DOMAIN_NESTED,
250 ARM_SMMU_DOMAIN_BYPASS,
253 struct arm_smmu_domain {
254 struct arm_smmu_device *smmu;
255 struct io_pgtable_ops *pgtbl_ops;
256 const struct iommu_gather_ops *tlb_ops;
257 struct arm_smmu_cfg cfg;
258 enum arm_smmu_domain_stage stage;
259 struct mutex init_mutex; /* Protects smmu pointer */
260 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
261 struct iommu_domain domain;
264 struct arm_smmu_option_prop {
265 u32 opt;
266 const char *prop;
269 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
271 static bool using_legacy_binding, using_generic_binding;
273 static struct arm_smmu_option_prop arm_smmu_options[] = {
274 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
275 { 0, NULL},
278 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
280 return container_of(dom, struct arm_smmu_domain, domain);
283 static void parse_driver_options(struct arm_smmu_device *smmu)
285 int i = 0;
287 do {
288 if (of_property_read_bool(smmu->dev->of_node,
289 arm_smmu_options[i].prop)) {
290 smmu->options |= arm_smmu_options[i].opt;
291 dev_notice(smmu->dev, "option %s\n",
292 arm_smmu_options[i].prop);
294 } while (arm_smmu_options[++i].opt);
297 static struct device_node *dev_get_dev_node(struct device *dev)
299 if (dev_is_pci(dev)) {
300 struct pci_bus *bus = to_pci_dev(dev)->bus;
302 while (!pci_is_root_bus(bus))
303 bus = bus->parent;
304 return of_node_get(bus->bridge->parent->of_node);
307 return of_node_get(dev->of_node);
310 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
312 *((__be32 *)data) = cpu_to_be32(alias);
313 return 0; /* Continue walking */
316 static int __find_legacy_master_phandle(struct device *dev, void *data)
318 struct of_phandle_iterator *it = *(void **)data;
319 struct device_node *np = it->node;
320 int err;
322 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
323 "#stream-id-cells", 0)
324 if (it->node == np) {
325 *(void **)data = dev;
326 return 1;
328 it->node = np;
329 return err == -ENOENT ? 0 : err;
332 static struct platform_driver arm_smmu_driver;
333 static struct iommu_ops arm_smmu_ops;
335 static int arm_smmu_register_legacy_master(struct device *dev,
336 struct arm_smmu_device **smmu)
338 struct device *smmu_dev;
339 struct device_node *np;
340 struct of_phandle_iterator it;
341 void *data = &it;
342 u32 *sids;
343 __be32 pci_sid;
344 int err;
346 np = dev_get_dev_node(dev);
347 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
348 of_node_put(np);
349 return -ENODEV;
352 it.node = np;
353 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
354 __find_legacy_master_phandle);
355 smmu_dev = data;
356 of_node_put(np);
357 if (err == 0)
358 return -ENODEV;
359 if (err < 0)
360 return err;
362 if (dev_is_pci(dev)) {
363 /* "mmu-masters" assumes Stream ID == Requester ID */
364 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
365 &pci_sid);
366 it.cur = &pci_sid;
367 it.cur_count = 1;
370 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
371 &arm_smmu_ops);
372 if (err)
373 return err;
375 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
376 if (!sids)
377 return -ENOMEM;
379 *smmu = dev_get_drvdata(smmu_dev);
380 of_phandle_iterator_args(&it, sids, it.cur_count);
381 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
382 kfree(sids);
383 return err;
386 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
388 int idx;
390 do {
391 idx = find_next_zero_bit(map, end, start);
392 if (idx == end)
393 return -ENOSPC;
394 } while (test_and_set_bit(idx, map));
396 return idx;
399 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
401 clear_bit(idx, map);
404 /* Wait for any pending TLB invalidations to complete */
405 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
406 void __iomem *sync, void __iomem *status)
408 unsigned int spin_cnt, delay;
410 writel_relaxed(QCOM_DUMMY_VAL, sync);
411 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
412 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
413 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
414 return;
415 cpu_relax();
417 udelay(delay);
419 dev_err_ratelimited(smmu->dev,
420 "TLB sync timed out -- SMMU may be deadlocked\n");
423 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
425 void __iomem *base = ARM_SMMU_GR0(smmu);
426 unsigned long flags;
428 spin_lock_irqsave(&smmu->global_sync_lock, flags);
429 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
430 base + ARM_SMMU_GR0_sTLBGSTATUS);
431 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
434 static void arm_smmu_tlb_sync_context(void *cookie)
436 struct arm_smmu_domain *smmu_domain = cookie;
437 struct arm_smmu_device *smmu = smmu_domain->smmu;
438 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
439 unsigned long flags;
441 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
442 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
443 base + ARM_SMMU_CB_TLBSTATUS);
444 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
447 static void arm_smmu_tlb_sync_vmid(void *cookie)
449 struct arm_smmu_domain *smmu_domain = cookie;
451 arm_smmu_tlb_sync_global(smmu_domain->smmu);
454 static void arm_smmu_tlb_inv_context_s1(void *cookie)
456 struct arm_smmu_domain *smmu_domain = cookie;
457 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
458 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
460 writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
461 arm_smmu_tlb_sync_context(cookie);
464 static void arm_smmu_tlb_inv_context_s2(void *cookie)
466 struct arm_smmu_domain *smmu_domain = cookie;
467 struct arm_smmu_device *smmu = smmu_domain->smmu;
468 void __iomem *base = ARM_SMMU_GR0(smmu);
470 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
471 arm_smmu_tlb_sync_global(smmu);
474 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
475 size_t granule, bool leaf, void *cookie)
477 struct arm_smmu_domain *smmu_domain = cookie;
478 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
479 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
480 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
482 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
483 wmb();
485 if (stage1) {
486 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
488 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
489 iova &= ~12UL;
490 iova |= cfg->asid;
491 do {
492 writel_relaxed(iova, reg);
493 iova += granule;
494 } while (size -= granule);
495 } else {
496 iova >>= 12;
497 iova |= (u64)cfg->asid << 48;
498 do {
499 writeq_relaxed(iova, reg);
500 iova += granule >> 12;
501 } while (size -= granule);
503 } else {
504 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
505 ARM_SMMU_CB_S2_TLBIIPAS2;
506 iova >>= 12;
507 do {
508 smmu_write_atomic_lq(iova, reg);
509 iova += granule >> 12;
510 } while (size -= granule);
515 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
516 * almost negligible, but the benefit of getting the first one in as far ahead
517 * of the sync as possible is significant, hence we don't just make this a
518 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
520 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
521 size_t granule, bool leaf, void *cookie)
523 struct arm_smmu_domain *smmu_domain = cookie;
524 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
526 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
527 wmb();
529 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
532 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
533 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
534 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
535 .tlb_sync = arm_smmu_tlb_sync_context,
538 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
539 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
540 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
541 .tlb_sync = arm_smmu_tlb_sync_context,
544 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
545 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
546 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
547 .tlb_sync = arm_smmu_tlb_sync_vmid,
550 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
552 u32 fsr, fsynr;
553 unsigned long iova;
554 struct iommu_domain *domain = dev;
555 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
556 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
557 struct arm_smmu_device *smmu = smmu_domain->smmu;
558 void __iomem *cb_base;
560 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
561 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
563 if (!(fsr & FSR_FAULT))
564 return IRQ_NONE;
566 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
567 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
569 dev_err_ratelimited(smmu->dev,
570 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
571 fsr, iova, fsynr, cfg->cbndx);
573 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
574 return IRQ_HANDLED;
577 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
579 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
580 struct arm_smmu_device *smmu = dev;
581 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
583 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
584 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
585 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
586 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
588 if (!gfsr)
589 return IRQ_NONE;
591 dev_err_ratelimited(smmu->dev,
592 "Unexpected global fault, this could be serious\n");
593 dev_err_ratelimited(smmu->dev,
594 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
595 gfsr, gfsynr0, gfsynr1, gfsynr2);
597 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
598 return IRQ_HANDLED;
601 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
602 struct io_pgtable_cfg *pgtbl_cfg)
604 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
605 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
606 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
608 cb->cfg = cfg;
610 /* TTBCR */
611 if (stage1) {
612 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
613 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
614 } else {
615 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
616 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
617 cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
618 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
619 cb->tcr[1] |= TTBCR2_AS;
621 } else {
622 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
625 /* TTBRs */
626 if (stage1) {
627 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
628 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
629 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
630 } else {
631 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
632 cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
633 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
634 cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
636 } else {
637 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
640 /* MAIRs (stage-1 only) */
641 if (stage1) {
642 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
643 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
644 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
645 } else {
646 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
647 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
652 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
654 u32 reg;
655 bool stage1;
656 struct arm_smmu_cb *cb = &smmu->cbs[idx];
657 struct arm_smmu_cfg *cfg = cb->cfg;
658 void __iomem *cb_base, *gr1_base;
660 cb_base = ARM_SMMU_CB(smmu, idx);
662 /* Unassigned context banks only need disabling */
663 if (!cfg) {
664 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
665 return;
668 gr1_base = ARM_SMMU_GR1(smmu);
669 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
671 /* CBA2R */
672 if (smmu->version > ARM_SMMU_V1) {
673 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
674 reg = CBA2R_RW64_64BIT;
675 else
676 reg = CBA2R_RW64_32BIT;
677 /* 16-bit VMIDs live in CBA2R */
678 if (smmu->features & ARM_SMMU_FEAT_VMID16)
679 reg |= cfg->vmid << CBA2R_VMID_SHIFT;
681 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
684 /* CBAR */
685 reg = cfg->cbar;
686 if (smmu->version < ARM_SMMU_V2)
687 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
690 * Use the weakest shareability/memory types, so they are
691 * overridden by the ttbcr/pte.
693 if (stage1) {
694 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
695 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
696 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
697 /* 8-bit VMIDs live in CBAR */
698 reg |= cfg->vmid << CBAR_VMID_SHIFT;
700 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
703 * TTBCR
704 * We must write this before the TTBRs, since it determines the
705 * access behaviour of some fields (in particular, ASID[15:8]).
707 if (stage1 && smmu->version > ARM_SMMU_V1)
708 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
709 writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
711 /* TTBRs */
712 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
713 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
714 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
715 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
716 } else {
717 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
718 if (stage1)
719 writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
722 /* MAIRs (stage-1 only) */
723 if (stage1) {
724 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
725 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
728 /* SCTLR */
729 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
730 if (stage1)
731 reg |= SCTLR_S1_ASIDPNE;
732 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
733 reg |= SCTLR_E;
735 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
738 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
739 struct arm_smmu_device *smmu)
741 int irq, start, ret = 0;
742 unsigned long ias, oas;
743 struct io_pgtable_ops *pgtbl_ops;
744 struct io_pgtable_cfg pgtbl_cfg;
745 enum io_pgtable_fmt fmt;
746 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
747 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
749 mutex_lock(&smmu_domain->init_mutex);
750 if (smmu_domain->smmu)
751 goto out_unlock;
753 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
754 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
755 smmu_domain->smmu = smmu;
756 goto out_unlock;
760 * Mapping the requested stage onto what we support is surprisingly
761 * complicated, mainly because the spec allows S1+S2 SMMUs without
762 * support for nested translation. That means we end up with the
763 * following table:
765 * Requested Supported Actual
766 * S1 N S1
767 * S1 S1+S2 S1
768 * S1 S2 S2
769 * S1 S1 S1
770 * N N N
771 * N S1+S2 S2
772 * N S2 S2
773 * N S1 S1
775 * Note that you can't actually request stage-2 mappings.
777 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
778 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
779 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
780 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
783 * Choosing a suitable context format is even more fiddly. Until we
784 * grow some way for the caller to express a preference, and/or move
785 * the decision into the io-pgtable code where it arguably belongs,
786 * just aim for the closest thing to the rest of the system, and hope
787 * that the hardware isn't esoteric enough that we can't assume AArch64
788 * support to be a superset of AArch32 support...
790 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
791 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
792 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
793 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
794 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
795 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
796 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
797 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
798 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
799 ARM_SMMU_FEAT_FMT_AARCH64_16K |
800 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
801 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
803 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
804 ret = -EINVAL;
805 goto out_unlock;
808 switch (smmu_domain->stage) {
809 case ARM_SMMU_DOMAIN_S1:
810 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
811 start = smmu->num_s2_context_banks;
812 ias = smmu->va_size;
813 oas = smmu->ipa_size;
814 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
815 fmt = ARM_64_LPAE_S1;
816 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
817 fmt = ARM_32_LPAE_S1;
818 ias = min(ias, 32UL);
819 oas = min(oas, 40UL);
820 } else {
821 fmt = ARM_V7S;
822 ias = min(ias, 32UL);
823 oas = min(oas, 32UL);
825 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
826 break;
827 case ARM_SMMU_DOMAIN_NESTED:
829 * We will likely want to change this if/when KVM gets
830 * involved.
832 case ARM_SMMU_DOMAIN_S2:
833 cfg->cbar = CBAR_TYPE_S2_TRANS;
834 start = 0;
835 ias = smmu->ipa_size;
836 oas = smmu->pa_size;
837 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
838 fmt = ARM_64_LPAE_S2;
839 } else {
840 fmt = ARM_32_LPAE_S2;
841 ias = min(ias, 40UL);
842 oas = min(oas, 40UL);
844 if (smmu->version == ARM_SMMU_V2)
845 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
846 else
847 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
848 break;
849 default:
850 ret = -EINVAL;
851 goto out_unlock;
853 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
854 smmu->num_context_banks);
855 if (ret < 0)
856 goto out_unlock;
858 cfg->cbndx = ret;
859 if (smmu->version < ARM_SMMU_V2) {
860 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
861 cfg->irptndx %= smmu->num_context_irqs;
862 } else {
863 cfg->irptndx = cfg->cbndx;
866 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
867 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
868 else
869 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
871 pgtbl_cfg = (struct io_pgtable_cfg) {
872 .pgsize_bitmap = smmu->pgsize_bitmap,
873 .ias = ias,
874 .oas = oas,
875 .tlb = smmu_domain->tlb_ops,
876 .iommu_dev = smmu->dev,
879 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
880 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
882 smmu_domain->smmu = smmu;
883 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
884 if (!pgtbl_ops) {
885 ret = -ENOMEM;
886 goto out_clear_smmu;
889 /* Update the domain's page sizes to reflect the page table format */
890 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
891 domain->geometry.aperture_end = (1UL << ias) - 1;
892 domain->geometry.force_aperture = true;
894 /* Initialise the context bank with our page table cfg */
895 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
896 arm_smmu_write_context_bank(smmu, cfg->cbndx);
899 * Request context fault interrupt. Do this last to avoid the
900 * handler seeing a half-initialised domain state.
902 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
903 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
904 IRQF_SHARED, "arm-smmu-context-fault", domain);
905 if (ret < 0) {
906 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
907 cfg->irptndx, irq);
908 cfg->irptndx = INVALID_IRPTNDX;
911 mutex_unlock(&smmu_domain->init_mutex);
913 /* Publish page table ops for map/unmap */
914 smmu_domain->pgtbl_ops = pgtbl_ops;
915 return 0;
917 out_clear_smmu:
918 smmu_domain->smmu = NULL;
919 out_unlock:
920 mutex_unlock(&smmu_domain->init_mutex);
921 return ret;
924 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
926 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
927 struct arm_smmu_device *smmu = smmu_domain->smmu;
928 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
929 int irq;
931 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
932 return;
935 * Disable the context bank and free the page tables before freeing
936 * it.
938 smmu->cbs[cfg->cbndx].cfg = NULL;
939 arm_smmu_write_context_bank(smmu, cfg->cbndx);
941 if (cfg->irptndx != INVALID_IRPTNDX) {
942 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
943 devm_free_irq(smmu->dev, irq, domain);
946 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
947 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
950 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
952 struct arm_smmu_domain *smmu_domain;
954 if (type != IOMMU_DOMAIN_UNMANAGED &&
955 type != IOMMU_DOMAIN_DMA &&
956 type != IOMMU_DOMAIN_IDENTITY)
957 return NULL;
959 * Allocate the domain and initialise some of its data structures.
960 * We can't really do anything meaningful until we've added a
961 * master.
963 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
964 if (!smmu_domain)
965 return NULL;
967 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
968 iommu_get_dma_cookie(&smmu_domain->domain))) {
969 kfree(smmu_domain);
970 return NULL;
973 mutex_init(&smmu_domain->init_mutex);
974 spin_lock_init(&smmu_domain->cb_lock);
976 return &smmu_domain->domain;
979 static void arm_smmu_domain_free(struct iommu_domain *domain)
981 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
984 * Free the domain resources. We assume that all devices have
985 * already been detached.
987 iommu_put_dma_cookie(domain);
988 arm_smmu_destroy_domain_context(domain);
989 kfree(smmu_domain);
992 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
994 struct arm_smmu_smr *smr = smmu->smrs + idx;
995 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
997 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
998 reg |= SMR_VALID;
999 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
1002 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
1004 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
1005 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
1006 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
1007 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
1009 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
1010 smmu->smrs[idx].valid)
1011 reg |= S2CR_EXIDVALID;
1012 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
1015 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1017 arm_smmu_write_s2cr(smmu, idx);
1018 if (smmu->smrs)
1019 arm_smmu_write_smr(smmu, idx);
1023 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1024 * should be called after sCR0 is written.
1026 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1028 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1029 u32 smr;
1031 if (!smmu->smrs)
1032 return;
1035 * SMR.ID bits may not be preserved if the corresponding MASK
1036 * bits are set, so check each one separately. We can reject
1037 * masters later if they try to claim IDs outside these masks.
1039 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1040 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1041 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1042 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1044 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1045 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1046 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1047 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1050 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1052 struct arm_smmu_smr *smrs = smmu->smrs;
1053 int i, free_idx = -ENOSPC;
1055 /* Stream indexing is blissfully easy */
1056 if (!smrs)
1057 return id;
1059 /* Validating SMRs is... less so */
1060 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1061 if (!smrs[i].valid) {
1063 * Note the first free entry we come across, which
1064 * we'll claim in the end if nothing else matches.
1066 if (free_idx < 0)
1067 free_idx = i;
1068 continue;
1071 * If the new entry is _entirely_ matched by an existing entry,
1072 * then reuse that, with the guarantee that there also cannot
1073 * be any subsequent conflicting entries. In normal use we'd
1074 * expect simply identical entries for this case, but there's
1075 * no harm in accommodating the generalisation.
1077 if ((mask & smrs[i].mask) == mask &&
1078 !((id ^ smrs[i].id) & ~smrs[i].mask))
1079 return i;
1081 * If the new entry has any other overlap with an existing one,
1082 * though, then there always exists at least one stream ID
1083 * which would cause a conflict, and we can't allow that risk.
1085 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1086 return -EINVAL;
1089 return free_idx;
1092 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1094 if (--smmu->s2crs[idx].count)
1095 return false;
1097 smmu->s2crs[idx] = s2cr_init_val;
1098 if (smmu->smrs)
1099 smmu->smrs[idx].valid = false;
1101 return true;
1104 static int arm_smmu_master_alloc_smes(struct device *dev)
1106 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1107 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1108 struct arm_smmu_device *smmu = cfg->smmu;
1109 struct arm_smmu_smr *smrs = smmu->smrs;
1110 struct iommu_group *group;
1111 int i, idx, ret;
1113 mutex_lock(&smmu->stream_map_mutex);
1114 /* Figure out a viable stream map entry allocation */
1115 for_each_cfg_sme(fwspec, i, idx) {
1116 u16 sid = fwspec->ids[i];
1117 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1119 if (idx != INVALID_SMENDX) {
1120 ret = -EEXIST;
1121 goto out_err;
1124 ret = arm_smmu_find_sme(smmu, sid, mask);
1125 if (ret < 0)
1126 goto out_err;
1128 idx = ret;
1129 if (smrs && smmu->s2crs[idx].count == 0) {
1130 smrs[idx].id = sid;
1131 smrs[idx].mask = mask;
1132 smrs[idx].valid = true;
1134 smmu->s2crs[idx].count++;
1135 cfg->smendx[i] = (s16)idx;
1138 group = iommu_group_get_for_dev(dev);
1139 if (!group)
1140 group = ERR_PTR(-ENOMEM);
1141 if (IS_ERR(group)) {
1142 ret = PTR_ERR(group);
1143 goto out_err;
1145 iommu_group_put(group);
1147 /* It worked! Now, poke the actual hardware */
1148 for_each_cfg_sme(fwspec, i, idx) {
1149 arm_smmu_write_sme(smmu, idx);
1150 smmu->s2crs[idx].group = group;
1153 mutex_unlock(&smmu->stream_map_mutex);
1154 return 0;
1156 out_err:
1157 while (i--) {
1158 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1159 cfg->smendx[i] = INVALID_SMENDX;
1161 mutex_unlock(&smmu->stream_map_mutex);
1162 return ret;
1165 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1167 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1168 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1169 int i, idx;
1171 mutex_lock(&smmu->stream_map_mutex);
1172 for_each_cfg_sme(fwspec, i, idx) {
1173 if (arm_smmu_free_sme(smmu, idx))
1174 arm_smmu_write_sme(smmu, idx);
1175 cfg->smendx[i] = INVALID_SMENDX;
1177 mutex_unlock(&smmu->stream_map_mutex);
1180 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1181 struct iommu_fwspec *fwspec)
1183 struct arm_smmu_device *smmu = smmu_domain->smmu;
1184 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1185 u8 cbndx = smmu_domain->cfg.cbndx;
1186 enum arm_smmu_s2cr_type type;
1187 int i, idx;
1189 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1190 type = S2CR_TYPE_BYPASS;
1191 else
1192 type = S2CR_TYPE_TRANS;
1194 for_each_cfg_sme(fwspec, i, idx) {
1195 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1196 continue;
1198 s2cr[idx].type = type;
1199 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1200 s2cr[idx].cbndx = cbndx;
1201 arm_smmu_write_s2cr(smmu, idx);
1203 return 0;
1206 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1208 int ret;
1209 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1210 struct arm_smmu_device *smmu;
1211 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1213 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1214 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1215 return -ENXIO;
1219 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1220 * domains between of_xlate() and add_device() - we have no way to cope
1221 * with that, so until ARM gets converted to rely on groups and default
1222 * domains, just say no (but more politely than by dereferencing NULL).
1223 * This should be at least a WARN_ON once that's sorted.
1225 if (!fwspec->iommu_priv)
1226 return -ENODEV;
1228 smmu = fwspec_smmu(fwspec);
1229 /* Ensure that the domain is finalised */
1230 ret = arm_smmu_init_domain_context(domain, smmu);
1231 if (ret < 0)
1232 return ret;
1235 * Sanity check the domain. We don't support domains across
1236 * different SMMUs.
1238 if (smmu_domain->smmu != smmu) {
1239 dev_err(dev,
1240 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1241 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1242 return -EINVAL;
1245 /* Looks ok, so add the device to the domain */
1246 return arm_smmu_domain_add_master(smmu_domain, fwspec);
1249 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1250 phys_addr_t paddr, size_t size, int prot)
1252 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1254 if (!ops)
1255 return -ENODEV;
1257 return ops->map(ops, iova, paddr, size, prot);
1260 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1261 size_t size)
1263 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1265 if (!ops)
1266 return 0;
1268 return ops->unmap(ops, iova, size);
1271 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1273 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1275 if (smmu_domain->tlb_ops)
1276 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1279 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1280 dma_addr_t iova)
1282 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1283 struct arm_smmu_device *smmu = smmu_domain->smmu;
1284 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1285 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1286 struct device *dev = smmu->dev;
1287 void __iomem *cb_base;
1288 u32 tmp;
1289 u64 phys;
1290 unsigned long va, flags;
1292 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1294 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1295 /* ATS1 registers can only be written atomically */
1296 va = iova & ~0xfffUL;
1297 if (smmu->version == ARM_SMMU_V2)
1298 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1299 else /* Register is only 32-bit in v1 */
1300 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1302 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1303 !(tmp & ATSR_ACTIVE), 5, 50)) {
1304 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1305 dev_err(dev,
1306 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1307 &iova);
1308 return ops->iova_to_phys(ops, iova);
1311 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1312 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1313 if (phys & CB_PAR_F) {
1314 dev_err(dev, "translation fault!\n");
1315 dev_err(dev, "PAR = 0x%llx\n", phys);
1316 return 0;
1319 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1322 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1323 dma_addr_t iova)
1325 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1326 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1328 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1329 return iova;
1331 if (!ops)
1332 return 0;
1334 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1335 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1336 return arm_smmu_iova_to_phys_hard(domain, iova);
1338 return ops->iova_to_phys(ops, iova);
1341 static bool arm_smmu_capable(enum iommu_cap cap)
1343 switch (cap) {
1344 case IOMMU_CAP_CACHE_COHERENCY:
1346 * Return true here as the SMMU can always send out coherent
1347 * requests.
1349 return true;
1350 case IOMMU_CAP_NOEXEC:
1351 return true;
1352 default:
1353 return false;
1357 static int arm_smmu_match_node(struct device *dev, void *data)
1359 return dev->fwnode == data;
1362 static
1363 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1365 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1366 fwnode, arm_smmu_match_node);
1367 put_device(dev);
1368 return dev ? dev_get_drvdata(dev) : NULL;
1371 static int arm_smmu_add_device(struct device *dev)
1373 struct arm_smmu_device *smmu;
1374 struct arm_smmu_master_cfg *cfg;
1375 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1376 int i, ret;
1378 if (using_legacy_binding) {
1379 ret = arm_smmu_register_legacy_master(dev, &smmu);
1382 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1383 * will allocate/initialise a new one. Thus we need to update fwspec for
1384 * later use.
1386 fwspec = dev->iommu_fwspec;
1387 if (ret)
1388 goto out_free;
1389 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1390 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1391 } else {
1392 return -ENODEV;
1395 ret = -EINVAL;
1396 for (i = 0; i < fwspec->num_ids; i++) {
1397 u16 sid = fwspec->ids[i];
1398 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1400 if (sid & ~smmu->streamid_mask) {
1401 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1402 sid, smmu->streamid_mask);
1403 goto out_free;
1405 if (mask & ~smmu->smr_mask_mask) {
1406 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1407 mask, smmu->smr_mask_mask);
1408 goto out_free;
1412 ret = -ENOMEM;
1413 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1414 GFP_KERNEL);
1415 if (!cfg)
1416 goto out_free;
1418 cfg->smmu = smmu;
1419 fwspec->iommu_priv = cfg;
1420 while (i--)
1421 cfg->smendx[i] = INVALID_SMENDX;
1423 ret = arm_smmu_master_alloc_smes(dev);
1424 if (ret)
1425 goto out_cfg_free;
1427 iommu_device_link(&smmu->iommu, dev);
1429 return 0;
1431 out_cfg_free:
1432 kfree(cfg);
1433 out_free:
1434 iommu_fwspec_free(dev);
1435 return ret;
1438 static void arm_smmu_remove_device(struct device *dev)
1440 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1441 struct arm_smmu_master_cfg *cfg;
1442 struct arm_smmu_device *smmu;
1445 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1446 return;
1448 cfg = fwspec->iommu_priv;
1449 smmu = cfg->smmu;
1451 iommu_device_unlink(&smmu->iommu, dev);
1452 arm_smmu_master_free_smes(fwspec);
1453 iommu_group_remove_device(dev);
1454 kfree(fwspec->iommu_priv);
1455 iommu_fwspec_free(dev);
1458 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1460 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1461 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1462 struct iommu_group *group = NULL;
1463 int i, idx;
1465 for_each_cfg_sme(fwspec, i, idx) {
1466 if (group && smmu->s2crs[idx].group &&
1467 group != smmu->s2crs[idx].group)
1468 return ERR_PTR(-EINVAL);
1470 group = smmu->s2crs[idx].group;
1473 if (group)
1474 return iommu_group_ref_get(group);
1476 if (dev_is_pci(dev))
1477 group = pci_device_group(dev);
1478 else
1479 group = generic_device_group(dev);
1481 return group;
1484 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1485 enum iommu_attr attr, void *data)
1487 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1489 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1490 return -EINVAL;
1492 switch (attr) {
1493 case DOMAIN_ATTR_NESTING:
1494 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1495 return 0;
1496 default:
1497 return -ENODEV;
1501 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1502 enum iommu_attr attr, void *data)
1504 int ret = 0;
1505 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1507 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1508 return -EINVAL;
1510 mutex_lock(&smmu_domain->init_mutex);
1512 switch (attr) {
1513 case DOMAIN_ATTR_NESTING:
1514 if (smmu_domain->smmu) {
1515 ret = -EPERM;
1516 goto out_unlock;
1519 if (*(int *)data)
1520 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1521 else
1522 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1524 break;
1525 default:
1526 ret = -ENODEV;
1529 out_unlock:
1530 mutex_unlock(&smmu_domain->init_mutex);
1531 return ret;
1534 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1536 u32 mask, fwid = 0;
1538 if (args->args_count > 0)
1539 fwid |= (u16)args->args[0];
1541 if (args->args_count > 1)
1542 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1543 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1544 fwid |= (u16)mask << SMR_MASK_SHIFT;
1546 return iommu_fwspec_add_ids(dev, &fwid, 1);
1549 static void arm_smmu_get_resv_regions(struct device *dev,
1550 struct list_head *head)
1552 struct iommu_resv_region *region;
1553 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1555 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1556 prot, IOMMU_RESV_SW_MSI);
1557 if (!region)
1558 return;
1560 list_add_tail(&region->list, head);
1562 iommu_dma_get_resv_regions(dev, head);
1565 static void arm_smmu_put_resv_regions(struct device *dev,
1566 struct list_head *head)
1568 struct iommu_resv_region *entry, *next;
1570 list_for_each_entry_safe(entry, next, head, list)
1571 kfree(entry);
1574 static struct iommu_ops arm_smmu_ops = {
1575 .capable = arm_smmu_capable,
1576 .domain_alloc = arm_smmu_domain_alloc,
1577 .domain_free = arm_smmu_domain_free,
1578 .attach_dev = arm_smmu_attach_dev,
1579 .map = arm_smmu_map,
1580 .unmap = arm_smmu_unmap,
1581 .flush_iotlb_all = arm_smmu_iotlb_sync,
1582 .iotlb_sync = arm_smmu_iotlb_sync,
1583 .iova_to_phys = arm_smmu_iova_to_phys,
1584 .add_device = arm_smmu_add_device,
1585 .remove_device = arm_smmu_remove_device,
1586 .device_group = arm_smmu_device_group,
1587 .domain_get_attr = arm_smmu_domain_get_attr,
1588 .domain_set_attr = arm_smmu_domain_set_attr,
1589 .of_xlate = arm_smmu_of_xlate,
1590 .get_resv_regions = arm_smmu_get_resv_regions,
1591 .put_resv_regions = arm_smmu_put_resv_regions,
1592 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1595 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1597 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1598 int i;
1599 u32 reg, major;
1601 /* clear global FSR */
1602 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1603 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1606 * Reset stream mapping groups: Initial values mark all SMRn as
1607 * invalid and all S2CRn as bypass unless overridden.
1609 for (i = 0; i < smmu->num_mapping_groups; ++i)
1610 arm_smmu_write_sme(smmu, i);
1612 if (smmu->model == ARM_MMU500) {
1614 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1615 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1616 * bit is only present in MMU-500r2 onwards.
1618 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1619 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1620 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1621 if (major >= 2)
1622 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1624 * Allow unmatched Stream IDs to allocate bypass
1625 * TLB entries for reduced latency.
1627 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1628 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1631 /* Make sure all context banks are disabled and clear CB_FSR */
1632 for (i = 0; i < smmu->num_context_banks; ++i) {
1633 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1635 arm_smmu_write_context_bank(smmu, i);
1636 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1638 * Disable MMU-500's not-particularly-beneficial next-page
1639 * prefetcher for the sake of errata #841119 and #826419.
1641 if (smmu->model == ARM_MMU500) {
1642 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1643 reg &= ~ARM_MMU500_ACTLR_CPRE;
1644 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1648 /* Invalidate the TLB, just in case */
1649 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1650 writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1652 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1654 /* Enable fault reporting */
1655 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1657 /* Disable TLB broadcasting. */
1658 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1660 /* Enable client access, handling unmatched streams as appropriate */
1661 reg &= ~sCR0_CLIENTPD;
1662 if (disable_bypass)
1663 reg |= sCR0_USFCFG;
1664 else
1665 reg &= ~sCR0_USFCFG;
1667 /* Disable forced broadcasting */
1668 reg &= ~sCR0_FB;
1670 /* Don't upgrade barriers */
1671 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1673 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1674 reg |= sCR0_VMID16EN;
1676 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1677 reg |= sCR0_EXIDENABLE;
1679 /* Push the button */
1680 arm_smmu_tlb_sync_global(smmu);
1681 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1684 static int arm_smmu_id_size_to_bits(int size)
1686 switch (size) {
1687 case 0:
1688 return 32;
1689 case 1:
1690 return 36;
1691 case 2:
1692 return 40;
1693 case 3:
1694 return 42;
1695 case 4:
1696 return 44;
1697 case 5:
1698 default:
1699 return 48;
1703 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1705 unsigned long size;
1706 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1707 u32 id;
1708 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1709 int i;
1711 dev_notice(smmu->dev, "probing hardware configuration...\n");
1712 dev_notice(smmu->dev, "SMMUv%d with:\n",
1713 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1715 /* ID0 */
1716 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1718 /* Restrict available stages based on module parameter */
1719 if (force_stage == 1)
1720 id &= ~(ID0_S2TS | ID0_NTS);
1721 else if (force_stage == 2)
1722 id &= ~(ID0_S1TS | ID0_NTS);
1724 if (id & ID0_S1TS) {
1725 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1726 dev_notice(smmu->dev, "\tstage 1 translation\n");
1729 if (id & ID0_S2TS) {
1730 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1731 dev_notice(smmu->dev, "\tstage 2 translation\n");
1734 if (id & ID0_NTS) {
1735 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1736 dev_notice(smmu->dev, "\tnested translation\n");
1739 if (!(smmu->features &
1740 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1741 dev_err(smmu->dev, "\tno translation support!\n");
1742 return -ENODEV;
1745 if ((id & ID0_S1TS) &&
1746 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1747 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1748 dev_notice(smmu->dev, "\taddress translation ops\n");
1752 * In order for DMA API calls to work properly, we must defer to what
1753 * the FW says about coherency, regardless of what the hardware claims.
1754 * Fortunately, this also opens up a workaround for systems where the
1755 * ID register value has ended up configured incorrectly.
1757 cttw_reg = !!(id & ID0_CTTW);
1758 if (cttw_fw || cttw_reg)
1759 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1760 cttw_fw ? "" : "non-");
1761 if (cttw_fw != cttw_reg)
1762 dev_notice(smmu->dev,
1763 "\t(IDR0.CTTW overridden by FW configuration)\n");
1765 /* Max. number of entries we have for stream matching/indexing */
1766 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1767 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1768 size = 1 << 16;
1769 } else {
1770 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1772 smmu->streamid_mask = size - 1;
1773 if (id & ID0_SMS) {
1774 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1775 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1776 if (size == 0) {
1777 dev_err(smmu->dev,
1778 "stream-matching supported, but no SMRs present!\n");
1779 return -ENODEV;
1782 /* Zero-initialised to mark as invalid */
1783 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1784 GFP_KERNEL);
1785 if (!smmu->smrs)
1786 return -ENOMEM;
1788 dev_notice(smmu->dev,
1789 "\tstream matching with %lu register groups", size);
1791 /* s2cr->type == 0 means translation, so initialise explicitly */
1792 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1793 GFP_KERNEL);
1794 if (!smmu->s2crs)
1795 return -ENOMEM;
1796 for (i = 0; i < size; i++)
1797 smmu->s2crs[i] = s2cr_init_val;
1799 smmu->num_mapping_groups = size;
1800 mutex_init(&smmu->stream_map_mutex);
1801 spin_lock_init(&smmu->global_sync_lock);
1803 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1804 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1805 if (!(id & ID0_PTFS_NO_AARCH32S))
1806 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1809 /* ID1 */
1810 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1811 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1813 /* Check for size mismatch of SMMU address space from mapped region */
1814 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1815 size <<= smmu->pgshift;
1816 if (smmu->cb_base != gr0_base + size)
1817 dev_warn(smmu->dev,
1818 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1819 size * 2, (smmu->cb_base - gr0_base) * 2);
1821 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1822 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1823 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1824 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1825 return -ENODEV;
1827 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1828 smmu->num_context_banks, smmu->num_s2_context_banks);
1830 * Cavium CN88xx erratum #27704.
1831 * Ensure ASID and VMID allocation is unique across all SMMUs in
1832 * the system.
1834 if (smmu->model == CAVIUM_SMMUV2) {
1835 smmu->cavium_id_base =
1836 atomic_add_return(smmu->num_context_banks,
1837 &cavium_smmu_context_count);
1838 smmu->cavium_id_base -= smmu->num_context_banks;
1839 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1841 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1842 sizeof(*smmu->cbs), GFP_KERNEL);
1843 if (!smmu->cbs)
1844 return -ENOMEM;
1846 /* ID2 */
1847 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1848 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1849 smmu->ipa_size = size;
1851 /* The output mask is also applied for bypass */
1852 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1853 smmu->pa_size = size;
1855 if (id & ID2_VMID16)
1856 smmu->features |= ARM_SMMU_FEAT_VMID16;
1859 * What the page table walker can address actually depends on which
1860 * descriptor format is in use, but since a) we don't know that yet,
1861 * and b) it can vary per context bank, this will have to do...
1863 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1864 dev_warn(smmu->dev,
1865 "failed to set DMA mask for table walker\n");
1867 if (smmu->version < ARM_SMMU_V2) {
1868 smmu->va_size = smmu->ipa_size;
1869 if (smmu->version == ARM_SMMU_V1_64K)
1870 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1871 } else {
1872 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1873 smmu->va_size = arm_smmu_id_size_to_bits(size);
1874 if (id & ID2_PTFS_4K)
1875 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1876 if (id & ID2_PTFS_16K)
1877 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1878 if (id & ID2_PTFS_64K)
1879 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1882 /* Now we've corralled the various formats, what'll it do? */
1883 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1884 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1885 if (smmu->features &
1886 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1887 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1888 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1889 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1890 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1891 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1893 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1894 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1895 else
1896 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1897 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1898 smmu->pgsize_bitmap);
1901 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1902 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1903 smmu->va_size, smmu->ipa_size);
1905 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1906 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1907 smmu->ipa_size, smmu->pa_size);
1909 return 0;
1912 struct arm_smmu_match_data {
1913 enum arm_smmu_arch_version version;
1914 enum arm_smmu_implementation model;
1917 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1918 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1920 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1921 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1922 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1923 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1924 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1925 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1927 static const struct of_device_id arm_smmu_of_match[] = {
1928 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1929 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1930 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1931 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1932 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1933 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1934 { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1935 { },
1937 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1939 #ifdef CONFIG_ACPI
1940 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1942 int ret = 0;
1944 switch (model) {
1945 case ACPI_IORT_SMMU_V1:
1946 case ACPI_IORT_SMMU_CORELINK_MMU400:
1947 smmu->version = ARM_SMMU_V1;
1948 smmu->model = GENERIC_SMMU;
1949 break;
1950 case ACPI_IORT_SMMU_CORELINK_MMU401:
1951 smmu->version = ARM_SMMU_V1_64K;
1952 smmu->model = GENERIC_SMMU;
1953 break;
1954 case ACPI_IORT_SMMU_V2:
1955 smmu->version = ARM_SMMU_V2;
1956 smmu->model = GENERIC_SMMU;
1957 break;
1958 case ACPI_IORT_SMMU_CORELINK_MMU500:
1959 smmu->version = ARM_SMMU_V2;
1960 smmu->model = ARM_MMU500;
1961 break;
1962 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1963 smmu->version = ARM_SMMU_V2;
1964 smmu->model = CAVIUM_SMMUV2;
1965 break;
1966 default:
1967 ret = -ENODEV;
1970 return ret;
1973 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1974 struct arm_smmu_device *smmu)
1976 struct device *dev = smmu->dev;
1977 struct acpi_iort_node *node =
1978 *(struct acpi_iort_node **)dev_get_platdata(dev);
1979 struct acpi_iort_smmu *iort_smmu;
1980 int ret;
1982 /* Retrieve SMMU1/2 specific data */
1983 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1985 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1986 if (ret < 0)
1987 return ret;
1989 /* Ignore the configuration access interrupt */
1990 smmu->num_global_irqs = 1;
1992 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1993 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1995 return 0;
1997 #else
1998 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1999 struct arm_smmu_device *smmu)
2001 return -ENODEV;
2003 #endif
2005 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2006 struct arm_smmu_device *smmu)
2008 const struct arm_smmu_match_data *data;
2009 struct device *dev = &pdev->dev;
2010 bool legacy_binding;
2012 if (of_property_read_u32(dev->of_node, "#global-interrupts",
2013 &smmu->num_global_irqs)) {
2014 dev_err(dev, "missing #global-interrupts property\n");
2015 return -ENODEV;
2018 data = of_device_get_match_data(dev);
2019 smmu->version = data->version;
2020 smmu->model = data->model;
2022 parse_driver_options(smmu);
2024 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2025 if (legacy_binding && !using_generic_binding) {
2026 if (!using_legacy_binding)
2027 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2028 using_legacy_binding = true;
2029 } else if (!legacy_binding && !using_legacy_binding) {
2030 using_generic_binding = true;
2031 } else {
2032 dev_err(dev, "not probing due to mismatched DT properties\n");
2033 return -ENODEV;
2036 if (of_dma_is_coherent(dev->of_node))
2037 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2039 return 0;
2042 static void arm_smmu_bus_init(void)
2044 /* Oh, for a proper bus abstraction */
2045 if (!iommu_present(&platform_bus_type))
2046 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2047 #ifdef CONFIG_ARM_AMBA
2048 if (!iommu_present(&amba_bustype))
2049 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2050 #endif
2051 #ifdef CONFIG_PCI
2052 if (!iommu_present(&pci_bus_type)) {
2053 pci_request_acs();
2054 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2056 #endif
2059 static int arm_smmu_device_probe(struct platform_device *pdev)
2061 struct resource *res;
2062 resource_size_t ioaddr;
2063 struct arm_smmu_device *smmu;
2064 struct device *dev = &pdev->dev;
2065 int num_irqs, i, err;
2067 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2068 if (!smmu) {
2069 dev_err(dev, "failed to allocate arm_smmu_device\n");
2070 return -ENOMEM;
2072 smmu->dev = dev;
2074 if (dev->of_node)
2075 err = arm_smmu_device_dt_probe(pdev, smmu);
2076 else
2077 err = arm_smmu_device_acpi_probe(pdev, smmu);
2079 if (err)
2080 return err;
2082 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2083 ioaddr = res->start;
2084 smmu->base = devm_ioremap_resource(dev, res);
2085 if (IS_ERR(smmu->base))
2086 return PTR_ERR(smmu->base);
2087 smmu->cb_base = smmu->base + resource_size(res) / 2;
2089 num_irqs = 0;
2090 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2091 num_irqs++;
2092 if (num_irqs > smmu->num_global_irqs)
2093 smmu->num_context_irqs++;
2096 if (!smmu->num_context_irqs) {
2097 dev_err(dev, "found %d interrupts but expected at least %d\n",
2098 num_irqs, smmu->num_global_irqs + 1);
2099 return -ENODEV;
2102 smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2103 GFP_KERNEL);
2104 if (!smmu->irqs) {
2105 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2106 return -ENOMEM;
2109 for (i = 0; i < num_irqs; ++i) {
2110 int irq = platform_get_irq(pdev, i);
2112 if (irq < 0) {
2113 dev_err(dev, "failed to get irq index %d\n", i);
2114 return -ENODEV;
2116 smmu->irqs[i] = irq;
2119 err = arm_smmu_device_cfg_probe(smmu);
2120 if (err)
2121 return err;
2123 if (smmu->version == ARM_SMMU_V2) {
2124 if (smmu->num_context_banks > smmu->num_context_irqs) {
2125 dev_err(dev,
2126 "found only %d context irq(s) but %d required\n",
2127 smmu->num_context_irqs, smmu->num_context_banks);
2128 return -ENODEV;
2131 /* Ignore superfluous interrupts */
2132 smmu->num_context_irqs = smmu->num_context_banks;
2135 for (i = 0; i < smmu->num_global_irqs; ++i) {
2136 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2137 arm_smmu_global_fault,
2138 IRQF_SHARED,
2139 "arm-smmu global fault",
2140 smmu);
2141 if (err) {
2142 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2143 i, smmu->irqs[i]);
2144 return err;
2148 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2149 "smmu.%pa", &ioaddr);
2150 if (err) {
2151 dev_err(dev, "Failed to register iommu in sysfs\n");
2152 return err;
2155 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2156 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2158 err = iommu_device_register(&smmu->iommu);
2159 if (err) {
2160 dev_err(dev, "Failed to register iommu\n");
2161 return err;
2164 platform_set_drvdata(pdev, smmu);
2165 arm_smmu_device_reset(smmu);
2166 arm_smmu_test_smr_masks(smmu);
2169 * For ACPI and generic DT bindings, an SMMU will be probed before
2170 * any device which might need it, so we want the bus ops in place
2171 * ready to handle default domain setup as soon as any SMMU exists.
2173 if (!using_legacy_binding)
2174 arm_smmu_bus_init();
2176 return 0;
2180 * With the legacy DT binding in play, though, we have no guarantees about
2181 * probe order, but then we're also not doing default domains, so we can
2182 * delay setting bus ops until we're sure every possible SMMU is ready,
2183 * and that way ensure that no add_device() calls get missed.
2185 static int arm_smmu_legacy_bus_init(void)
2187 if (using_legacy_binding)
2188 arm_smmu_bus_init();
2189 return 0;
2191 device_initcall_sync(arm_smmu_legacy_bus_init);
2193 static int arm_smmu_device_remove(struct platform_device *pdev)
2195 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2197 if (!smmu)
2198 return -ENODEV;
2200 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2201 dev_err(&pdev->dev, "removing device with active domains!\n");
2203 /* Turn the thing off */
2204 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2205 return 0;
2208 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2210 arm_smmu_device_remove(pdev);
2213 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2215 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2217 arm_smmu_device_reset(smmu);
2218 return 0;
2221 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2223 static struct platform_driver arm_smmu_driver = {
2224 .driver = {
2225 .name = "arm-smmu",
2226 .of_match_table = of_match_ptr(arm_smmu_of_match),
2227 .pm = &arm_smmu_pm_ops,
2229 .probe = arm_smmu_device_probe,
2230 .remove = arm_smmu_device_remove,
2231 .shutdown = arm_smmu_device_shutdown,
2233 module_platform_driver(arm_smmu_driver);
2235 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2236 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2237 MODULE_LICENSE("GPL v2");