Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cris-mirror.git] / drivers / iommu / arm-smmu.c
blob69e7c60792a8e37130f75b447232d25369888cb8
1 /*
2 * IOMMU API for ARM architected SMMU implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 * Copyright (C) 2013 ARM Limited
19 * Author: Will Deacon <will.deacon@arm.com>
21 * This driver currently supports:
22 * - SMMUv1 and v2 implementations
23 * - Stream-matching and stream-indexing
24 * - v7/v8 long-descriptor format
25 * - Non-secure access to the SMMU
26 * - Context fault reporting
27 * - Extended Stream ID (16 bit)
30 #define pr_fmt(fmt) "arm-smmu: " fmt
32 #include <linux/acpi.h>
33 #include <linux/acpi_iort.h>
34 #include <linux/atomic.h>
35 #include <linux/delay.h>
36 #include <linux/dma-iommu.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/err.h>
39 #include <linux/interrupt.h>
40 #include <linux/io.h>
41 #include <linux/io-64-nonatomic-hi-lo.h>
42 #include <linux/iommu.h>
43 #include <linux/iopoll.h>
44 #include <linux/module.h>
45 #include <linux/of.h>
46 #include <linux/of_address.h>
47 #include <linux/of_device.h>
48 #include <linux/of_iommu.h>
49 #include <linux/pci.h>
50 #include <linux/platform_device.h>
51 #include <linux/slab.h>
52 #include <linux/spinlock.h>
54 #include <linux/amba/bus.h>
56 #include "io-pgtable.h"
57 #include "arm-smmu-regs.h"
59 #define ARM_MMU500_ACTLR_CPRE (1 << 1)
61 #define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
62 #define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
63 #define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
65 #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
66 #define TLB_SPIN_COUNT 10
68 /* Maximum number of context banks per SMMU */
69 #define ARM_SMMU_MAX_CBS 128
71 /* SMMU global address space */
72 #define ARM_SMMU_GR0(smmu) ((smmu)->base)
73 #define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
76 * SMMU global address space with conditional offset to access secure
77 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
78 * nsGFSYNR0: 0x450)
80 #define ARM_SMMU_GR0_NS(smmu) \
81 ((smmu)->base + \
82 ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
83 ? 0x400 : 0))
86 * Some 64-bit registers only make sense to write atomically, but in such
87 * cases all the data relevant to AArch32 formats lies within the lower word,
88 * therefore this actually makes more sense than it might first appear.
90 #ifdef CONFIG_64BIT
91 #define smmu_write_atomic_lq writeq_relaxed
92 #else
93 #define smmu_write_atomic_lq writel_relaxed
94 #endif
96 /* Translation context bank */
97 #define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
99 #define MSI_IOVA_BASE 0x8000000
100 #define MSI_IOVA_LENGTH 0x100000
102 static int force_stage;
103 module_param(force_stage, int, S_IRUGO);
104 MODULE_PARM_DESC(force_stage,
105 "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
106 static bool disable_bypass;
107 module_param(disable_bypass, bool, S_IRUGO);
108 MODULE_PARM_DESC(disable_bypass,
109 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
111 enum arm_smmu_arch_version {
112 ARM_SMMU_V1,
113 ARM_SMMU_V1_64K,
114 ARM_SMMU_V2,
117 enum arm_smmu_implementation {
118 GENERIC_SMMU,
119 ARM_MMU500,
120 CAVIUM_SMMUV2,
123 struct arm_smmu_s2cr {
124 struct iommu_group *group;
125 int count;
126 enum arm_smmu_s2cr_type type;
127 enum arm_smmu_s2cr_privcfg privcfg;
128 u8 cbndx;
131 #define s2cr_init_val (struct arm_smmu_s2cr){ \
132 .type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
135 struct arm_smmu_smr {
136 u16 mask;
137 u16 id;
138 bool valid;
141 struct arm_smmu_cb {
142 u64 ttbr[2];
143 u32 tcr[2];
144 u32 mair[2];
145 struct arm_smmu_cfg *cfg;
148 struct arm_smmu_master_cfg {
149 struct arm_smmu_device *smmu;
150 s16 smendx[];
152 #define INVALID_SMENDX -1
153 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
154 #define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
155 #define fwspec_smendx(fw, i) \
156 (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
157 #define for_each_cfg_sme(fw, i, idx) \
158 for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
160 struct arm_smmu_device {
161 struct device *dev;
163 void __iomem *base;
164 void __iomem *cb_base;
165 unsigned long pgshift;
167 #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
168 #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
169 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
170 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
171 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
172 #define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
173 #define ARM_SMMU_FEAT_VMID16 (1 << 6)
174 #define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
175 #define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
176 #define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
177 #define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
178 #define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
179 #define ARM_SMMU_FEAT_EXIDS (1 << 12)
180 u32 features;
182 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
183 u32 options;
184 enum arm_smmu_arch_version version;
185 enum arm_smmu_implementation model;
187 u32 num_context_banks;
188 u32 num_s2_context_banks;
189 DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
190 struct arm_smmu_cb *cbs;
191 atomic_t irptndx;
193 u32 num_mapping_groups;
194 u16 streamid_mask;
195 u16 smr_mask_mask;
196 struct arm_smmu_smr *smrs;
197 struct arm_smmu_s2cr *s2crs;
198 struct mutex stream_map_mutex;
200 unsigned long va_size;
201 unsigned long ipa_size;
202 unsigned long pa_size;
203 unsigned long pgsize_bitmap;
205 u32 num_global_irqs;
206 u32 num_context_irqs;
207 unsigned int *irqs;
209 u32 cavium_id_base; /* Specific to Cavium */
211 spinlock_t global_sync_lock;
213 /* IOMMU core code handle */
214 struct iommu_device iommu;
217 enum arm_smmu_context_fmt {
218 ARM_SMMU_CTX_FMT_NONE,
219 ARM_SMMU_CTX_FMT_AARCH64,
220 ARM_SMMU_CTX_FMT_AARCH32_L,
221 ARM_SMMU_CTX_FMT_AARCH32_S,
224 struct arm_smmu_cfg {
225 u8 cbndx;
226 u8 irptndx;
227 union {
228 u16 asid;
229 u16 vmid;
231 u32 cbar;
232 enum arm_smmu_context_fmt fmt;
234 #define INVALID_IRPTNDX 0xff
236 enum arm_smmu_domain_stage {
237 ARM_SMMU_DOMAIN_S1 = 0,
238 ARM_SMMU_DOMAIN_S2,
239 ARM_SMMU_DOMAIN_NESTED,
240 ARM_SMMU_DOMAIN_BYPASS,
243 struct arm_smmu_domain {
244 struct arm_smmu_device *smmu;
245 struct io_pgtable_ops *pgtbl_ops;
246 const struct iommu_gather_ops *tlb_ops;
247 struct arm_smmu_cfg cfg;
248 enum arm_smmu_domain_stage stage;
249 struct mutex init_mutex; /* Protects smmu pointer */
250 spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
251 struct iommu_domain domain;
254 struct arm_smmu_option_prop {
255 u32 opt;
256 const char *prop;
259 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
261 static bool using_legacy_binding, using_generic_binding;
263 static struct arm_smmu_option_prop arm_smmu_options[] = {
264 { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
265 { 0, NULL},
268 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
270 return container_of(dom, struct arm_smmu_domain, domain);
273 static void parse_driver_options(struct arm_smmu_device *smmu)
275 int i = 0;
277 do {
278 if (of_property_read_bool(smmu->dev->of_node,
279 arm_smmu_options[i].prop)) {
280 smmu->options |= arm_smmu_options[i].opt;
281 dev_notice(smmu->dev, "option %s\n",
282 arm_smmu_options[i].prop);
284 } while (arm_smmu_options[++i].opt);
287 static struct device_node *dev_get_dev_node(struct device *dev)
289 if (dev_is_pci(dev)) {
290 struct pci_bus *bus = to_pci_dev(dev)->bus;
292 while (!pci_is_root_bus(bus))
293 bus = bus->parent;
294 return of_node_get(bus->bridge->parent->of_node);
297 return of_node_get(dev->of_node);
300 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
302 *((__be32 *)data) = cpu_to_be32(alias);
303 return 0; /* Continue walking */
306 static int __find_legacy_master_phandle(struct device *dev, void *data)
308 struct of_phandle_iterator *it = *(void **)data;
309 struct device_node *np = it->node;
310 int err;
312 of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
313 "#stream-id-cells", 0)
314 if (it->node == np) {
315 *(void **)data = dev;
316 return 1;
318 it->node = np;
319 return err == -ENOENT ? 0 : err;
322 static struct platform_driver arm_smmu_driver;
323 static struct iommu_ops arm_smmu_ops;
325 static int arm_smmu_register_legacy_master(struct device *dev,
326 struct arm_smmu_device **smmu)
328 struct device *smmu_dev;
329 struct device_node *np;
330 struct of_phandle_iterator it;
331 void *data = &it;
332 u32 *sids;
333 __be32 pci_sid;
334 int err;
336 np = dev_get_dev_node(dev);
337 if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
338 of_node_put(np);
339 return -ENODEV;
342 it.node = np;
343 err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
344 __find_legacy_master_phandle);
345 smmu_dev = data;
346 of_node_put(np);
347 if (err == 0)
348 return -ENODEV;
349 if (err < 0)
350 return err;
352 if (dev_is_pci(dev)) {
353 /* "mmu-masters" assumes Stream ID == Requester ID */
354 pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
355 &pci_sid);
356 it.cur = &pci_sid;
357 it.cur_count = 1;
360 err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
361 &arm_smmu_ops);
362 if (err)
363 return err;
365 sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
366 if (!sids)
367 return -ENOMEM;
369 *smmu = dev_get_drvdata(smmu_dev);
370 of_phandle_iterator_args(&it, sids, it.cur_count);
371 err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
372 kfree(sids);
373 return err;
376 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
378 int idx;
380 do {
381 idx = find_next_zero_bit(map, end, start);
382 if (idx == end)
383 return -ENOSPC;
384 } while (test_and_set_bit(idx, map));
386 return idx;
389 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
391 clear_bit(idx, map);
394 /* Wait for any pending TLB invalidations to complete */
395 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
396 void __iomem *sync, void __iomem *status)
398 unsigned int spin_cnt, delay;
400 writel_relaxed(0, sync);
401 for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
402 for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
403 if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
404 return;
405 cpu_relax();
407 udelay(delay);
409 dev_err_ratelimited(smmu->dev,
410 "TLB sync timed out -- SMMU may be deadlocked\n");
413 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
415 void __iomem *base = ARM_SMMU_GR0(smmu);
416 unsigned long flags;
418 spin_lock_irqsave(&smmu->global_sync_lock, flags);
419 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
420 base + ARM_SMMU_GR0_sTLBGSTATUS);
421 spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
424 static void arm_smmu_tlb_sync_context(void *cookie)
426 struct arm_smmu_domain *smmu_domain = cookie;
427 struct arm_smmu_device *smmu = smmu_domain->smmu;
428 void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
429 unsigned long flags;
431 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
432 __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
433 base + ARM_SMMU_CB_TLBSTATUS);
434 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
437 static void arm_smmu_tlb_sync_vmid(void *cookie)
439 struct arm_smmu_domain *smmu_domain = cookie;
441 arm_smmu_tlb_sync_global(smmu_domain->smmu);
444 static void arm_smmu_tlb_inv_context_s1(void *cookie)
446 struct arm_smmu_domain *smmu_domain = cookie;
447 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
448 void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
450 writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
451 arm_smmu_tlb_sync_context(cookie);
454 static void arm_smmu_tlb_inv_context_s2(void *cookie)
456 struct arm_smmu_domain *smmu_domain = cookie;
457 struct arm_smmu_device *smmu = smmu_domain->smmu;
458 void __iomem *base = ARM_SMMU_GR0(smmu);
460 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
461 arm_smmu_tlb_sync_global(smmu);
464 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
465 size_t granule, bool leaf, void *cookie)
467 struct arm_smmu_domain *smmu_domain = cookie;
468 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
469 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
470 void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
472 if (stage1) {
473 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
475 if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
476 iova &= ~12UL;
477 iova |= cfg->asid;
478 do {
479 writel_relaxed(iova, reg);
480 iova += granule;
481 } while (size -= granule);
482 } else {
483 iova >>= 12;
484 iova |= (u64)cfg->asid << 48;
485 do {
486 writeq_relaxed(iova, reg);
487 iova += granule >> 12;
488 } while (size -= granule);
490 } else {
491 reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
492 ARM_SMMU_CB_S2_TLBIIPAS2;
493 iova >>= 12;
494 do {
495 smmu_write_atomic_lq(iova, reg);
496 iova += granule >> 12;
497 } while (size -= granule);
502 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
503 * almost negligible, but the benefit of getting the first one in as far ahead
504 * of the sync as possible is significant, hence we don't just make this a
505 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
507 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
508 size_t granule, bool leaf, void *cookie)
510 struct arm_smmu_domain *smmu_domain = cookie;
511 void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
513 writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
516 static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
517 .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
518 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
519 .tlb_sync = arm_smmu_tlb_sync_context,
522 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
523 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
524 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
525 .tlb_sync = arm_smmu_tlb_sync_context,
528 static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
529 .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
530 .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
531 .tlb_sync = arm_smmu_tlb_sync_vmid,
534 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
536 u32 fsr, fsynr;
537 unsigned long iova;
538 struct iommu_domain *domain = dev;
539 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
540 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
541 struct arm_smmu_device *smmu = smmu_domain->smmu;
542 void __iomem *cb_base;
544 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
545 fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
547 if (!(fsr & FSR_FAULT))
548 return IRQ_NONE;
550 fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
551 iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
553 dev_err_ratelimited(smmu->dev,
554 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
555 fsr, iova, fsynr, cfg->cbndx);
557 writel(fsr, cb_base + ARM_SMMU_CB_FSR);
558 return IRQ_HANDLED;
561 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
563 u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
564 struct arm_smmu_device *smmu = dev;
565 void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
567 gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
568 gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
569 gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
570 gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
572 if (!gfsr)
573 return IRQ_NONE;
575 dev_err_ratelimited(smmu->dev,
576 "Unexpected global fault, this could be serious\n");
577 dev_err_ratelimited(smmu->dev,
578 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
579 gfsr, gfsynr0, gfsynr1, gfsynr2);
581 writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
582 return IRQ_HANDLED;
585 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
586 struct io_pgtable_cfg *pgtbl_cfg)
588 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
589 struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
590 bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
592 cb->cfg = cfg;
594 /* TTBCR */
595 if (stage1) {
596 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
597 cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
598 } else {
599 cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
600 cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
601 cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
602 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
603 cb->tcr[1] |= TTBCR2_AS;
605 } else {
606 cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
609 /* TTBRs */
610 if (stage1) {
611 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
612 cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
613 cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
614 } else {
615 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
616 cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
617 cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
618 cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
620 } else {
621 cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
624 /* MAIRs (stage-1 only) */
625 if (stage1) {
626 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
627 cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
628 cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
629 } else {
630 cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
631 cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
636 static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
638 u32 reg;
639 bool stage1;
640 struct arm_smmu_cb *cb = &smmu->cbs[idx];
641 struct arm_smmu_cfg *cfg = cb->cfg;
642 void __iomem *cb_base, *gr1_base;
644 cb_base = ARM_SMMU_CB(smmu, idx);
646 /* Unassigned context banks only need disabling */
647 if (!cfg) {
648 writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
649 return;
652 gr1_base = ARM_SMMU_GR1(smmu);
653 stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
655 /* CBA2R */
656 if (smmu->version > ARM_SMMU_V1) {
657 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
658 reg = CBA2R_RW64_64BIT;
659 else
660 reg = CBA2R_RW64_32BIT;
661 /* 16-bit VMIDs live in CBA2R */
662 if (smmu->features & ARM_SMMU_FEAT_VMID16)
663 reg |= cfg->vmid << CBA2R_VMID_SHIFT;
665 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
668 /* CBAR */
669 reg = cfg->cbar;
670 if (smmu->version < ARM_SMMU_V2)
671 reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
674 * Use the weakest shareability/memory types, so they are
675 * overridden by the ttbcr/pte.
677 if (stage1) {
678 reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
679 (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
680 } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
681 /* 8-bit VMIDs live in CBAR */
682 reg |= cfg->vmid << CBAR_VMID_SHIFT;
684 writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
687 * TTBCR
688 * We must write this before the TTBRs, since it determines the
689 * access behaviour of some fields (in particular, ASID[15:8]).
691 if (stage1 && smmu->version > ARM_SMMU_V1)
692 writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
693 writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
695 /* TTBRs */
696 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
697 writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
698 writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
699 writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
700 } else {
701 writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
702 if (stage1)
703 writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
706 /* MAIRs (stage-1 only) */
707 if (stage1) {
708 writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
709 writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
712 /* SCTLR */
713 reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
714 if (stage1)
715 reg |= SCTLR_S1_ASIDPNE;
716 if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
717 reg |= SCTLR_E;
719 writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
722 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
723 struct arm_smmu_device *smmu)
725 int irq, start, ret = 0;
726 unsigned long ias, oas;
727 struct io_pgtable_ops *pgtbl_ops;
728 struct io_pgtable_cfg pgtbl_cfg;
729 enum io_pgtable_fmt fmt;
730 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
731 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
733 mutex_lock(&smmu_domain->init_mutex);
734 if (smmu_domain->smmu)
735 goto out_unlock;
737 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
738 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
739 smmu_domain->smmu = smmu;
740 goto out_unlock;
744 * Mapping the requested stage onto what we support is surprisingly
745 * complicated, mainly because the spec allows S1+S2 SMMUs without
746 * support for nested translation. That means we end up with the
747 * following table:
749 * Requested Supported Actual
750 * S1 N S1
751 * S1 S1+S2 S1
752 * S1 S2 S2
753 * S1 S1 S1
754 * N N N
755 * N S1+S2 S2
756 * N S2 S2
757 * N S1 S1
759 * Note that you can't actually request stage-2 mappings.
761 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
762 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
763 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
764 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
767 * Choosing a suitable context format is even more fiddly. Until we
768 * grow some way for the caller to express a preference, and/or move
769 * the decision into the io-pgtable code where it arguably belongs,
770 * just aim for the closest thing to the rest of the system, and hope
771 * that the hardware isn't esoteric enough that we can't assume AArch64
772 * support to be a superset of AArch32 support...
774 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
775 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
776 if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
777 !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
778 (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
779 (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
780 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
781 if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
782 (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
783 ARM_SMMU_FEAT_FMT_AARCH64_16K |
784 ARM_SMMU_FEAT_FMT_AARCH64_4K)))
785 cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
787 if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
788 ret = -EINVAL;
789 goto out_unlock;
792 switch (smmu_domain->stage) {
793 case ARM_SMMU_DOMAIN_S1:
794 cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
795 start = smmu->num_s2_context_banks;
796 ias = smmu->va_size;
797 oas = smmu->ipa_size;
798 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
799 fmt = ARM_64_LPAE_S1;
800 } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
801 fmt = ARM_32_LPAE_S1;
802 ias = min(ias, 32UL);
803 oas = min(oas, 40UL);
804 } else {
805 fmt = ARM_V7S;
806 ias = min(ias, 32UL);
807 oas = min(oas, 32UL);
809 smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
810 break;
811 case ARM_SMMU_DOMAIN_NESTED:
813 * We will likely want to change this if/when KVM gets
814 * involved.
816 case ARM_SMMU_DOMAIN_S2:
817 cfg->cbar = CBAR_TYPE_S2_TRANS;
818 start = 0;
819 ias = smmu->ipa_size;
820 oas = smmu->pa_size;
821 if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
822 fmt = ARM_64_LPAE_S2;
823 } else {
824 fmt = ARM_32_LPAE_S2;
825 ias = min(ias, 40UL);
826 oas = min(oas, 40UL);
828 if (smmu->version == ARM_SMMU_V2)
829 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
830 else
831 smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
832 break;
833 default:
834 ret = -EINVAL;
835 goto out_unlock;
837 ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
838 smmu->num_context_banks);
839 if (ret < 0)
840 goto out_unlock;
842 cfg->cbndx = ret;
843 if (smmu->version < ARM_SMMU_V2) {
844 cfg->irptndx = atomic_inc_return(&smmu->irptndx);
845 cfg->irptndx %= smmu->num_context_irqs;
846 } else {
847 cfg->irptndx = cfg->cbndx;
850 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
851 cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
852 else
853 cfg->asid = cfg->cbndx + smmu->cavium_id_base;
855 pgtbl_cfg = (struct io_pgtable_cfg) {
856 .pgsize_bitmap = smmu->pgsize_bitmap,
857 .ias = ias,
858 .oas = oas,
859 .tlb = smmu_domain->tlb_ops,
860 .iommu_dev = smmu->dev,
863 if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
864 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
866 smmu_domain->smmu = smmu;
867 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
868 if (!pgtbl_ops) {
869 ret = -ENOMEM;
870 goto out_clear_smmu;
873 /* Update the domain's page sizes to reflect the page table format */
874 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
875 domain->geometry.aperture_end = (1UL << ias) - 1;
876 domain->geometry.force_aperture = true;
878 /* Initialise the context bank with our page table cfg */
879 arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
880 arm_smmu_write_context_bank(smmu, cfg->cbndx);
883 * Request context fault interrupt. Do this last to avoid the
884 * handler seeing a half-initialised domain state.
886 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
887 ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
888 IRQF_SHARED, "arm-smmu-context-fault", domain);
889 if (ret < 0) {
890 dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
891 cfg->irptndx, irq);
892 cfg->irptndx = INVALID_IRPTNDX;
895 mutex_unlock(&smmu_domain->init_mutex);
897 /* Publish page table ops for map/unmap */
898 smmu_domain->pgtbl_ops = pgtbl_ops;
899 return 0;
901 out_clear_smmu:
902 smmu_domain->smmu = NULL;
903 out_unlock:
904 mutex_unlock(&smmu_domain->init_mutex);
905 return ret;
908 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
910 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
911 struct arm_smmu_device *smmu = smmu_domain->smmu;
912 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
913 int irq;
915 if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
916 return;
919 * Disable the context bank and free the page tables before freeing
920 * it.
922 smmu->cbs[cfg->cbndx].cfg = NULL;
923 arm_smmu_write_context_bank(smmu, cfg->cbndx);
925 if (cfg->irptndx != INVALID_IRPTNDX) {
926 irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
927 devm_free_irq(smmu->dev, irq, domain);
930 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
931 __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
934 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
936 struct arm_smmu_domain *smmu_domain;
938 if (type != IOMMU_DOMAIN_UNMANAGED &&
939 type != IOMMU_DOMAIN_DMA &&
940 type != IOMMU_DOMAIN_IDENTITY)
941 return NULL;
943 * Allocate the domain and initialise some of its data structures.
944 * We can't really do anything meaningful until we've added a
945 * master.
947 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
948 if (!smmu_domain)
949 return NULL;
951 if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
952 iommu_get_dma_cookie(&smmu_domain->domain))) {
953 kfree(smmu_domain);
954 return NULL;
957 mutex_init(&smmu_domain->init_mutex);
958 spin_lock_init(&smmu_domain->cb_lock);
960 return &smmu_domain->domain;
963 static void arm_smmu_domain_free(struct iommu_domain *domain)
965 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
968 * Free the domain resources. We assume that all devices have
969 * already been detached.
971 iommu_put_dma_cookie(domain);
972 arm_smmu_destroy_domain_context(domain);
973 kfree(smmu_domain);
976 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
978 struct arm_smmu_smr *smr = smmu->smrs + idx;
979 u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
981 if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
982 reg |= SMR_VALID;
983 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
986 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
988 struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
989 u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
990 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
991 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
993 if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
994 smmu->smrs[idx].valid)
995 reg |= S2CR_EXIDVALID;
996 writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
999 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
1001 arm_smmu_write_s2cr(smmu, idx);
1002 if (smmu->smrs)
1003 arm_smmu_write_smr(smmu, idx);
1007 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
1008 * should be called after sCR0 is written.
1010 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
1012 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1013 u32 smr;
1015 if (!smmu->smrs)
1016 return;
1019 * SMR.ID bits may not be preserved if the corresponding MASK
1020 * bits are set, so check each one separately. We can reject
1021 * masters later if they try to claim IDs outside these masks.
1023 smr = smmu->streamid_mask << SMR_ID_SHIFT;
1024 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1025 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1026 smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1028 smr = smmu->streamid_mask << SMR_MASK_SHIFT;
1029 writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
1030 smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1031 smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1034 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1036 struct arm_smmu_smr *smrs = smmu->smrs;
1037 int i, free_idx = -ENOSPC;
1039 /* Stream indexing is blissfully easy */
1040 if (!smrs)
1041 return id;
1043 /* Validating SMRs is... less so */
1044 for (i = 0; i < smmu->num_mapping_groups; ++i) {
1045 if (!smrs[i].valid) {
1047 * Note the first free entry we come across, which
1048 * we'll claim in the end if nothing else matches.
1050 if (free_idx < 0)
1051 free_idx = i;
1052 continue;
1055 * If the new entry is _entirely_ matched by an existing entry,
1056 * then reuse that, with the guarantee that there also cannot
1057 * be any subsequent conflicting entries. In normal use we'd
1058 * expect simply identical entries for this case, but there's
1059 * no harm in accommodating the generalisation.
1061 if ((mask & smrs[i].mask) == mask &&
1062 !((id ^ smrs[i].id) & ~smrs[i].mask))
1063 return i;
1065 * If the new entry has any other overlap with an existing one,
1066 * though, then there always exists at least one stream ID
1067 * which would cause a conflict, and we can't allow that risk.
1069 if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1070 return -EINVAL;
1073 return free_idx;
1076 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1078 if (--smmu->s2crs[idx].count)
1079 return false;
1081 smmu->s2crs[idx] = s2cr_init_val;
1082 if (smmu->smrs)
1083 smmu->smrs[idx].valid = false;
1085 return true;
1088 static int arm_smmu_master_alloc_smes(struct device *dev)
1090 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1091 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1092 struct arm_smmu_device *smmu = cfg->smmu;
1093 struct arm_smmu_smr *smrs = smmu->smrs;
1094 struct iommu_group *group;
1095 int i, idx, ret;
1097 mutex_lock(&smmu->stream_map_mutex);
1098 /* Figure out a viable stream map entry allocation */
1099 for_each_cfg_sme(fwspec, i, idx) {
1100 u16 sid = fwspec->ids[i];
1101 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1103 if (idx != INVALID_SMENDX) {
1104 ret = -EEXIST;
1105 goto out_err;
1108 ret = arm_smmu_find_sme(smmu, sid, mask);
1109 if (ret < 0)
1110 goto out_err;
1112 idx = ret;
1113 if (smrs && smmu->s2crs[idx].count == 0) {
1114 smrs[idx].id = sid;
1115 smrs[idx].mask = mask;
1116 smrs[idx].valid = true;
1118 smmu->s2crs[idx].count++;
1119 cfg->smendx[i] = (s16)idx;
1122 group = iommu_group_get_for_dev(dev);
1123 if (!group)
1124 group = ERR_PTR(-ENOMEM);
1125 if (IS_ERR(group)) {
1126 ret = PTR_ERR(group);
1127 goto out_err;
1129 iommu_group_put(group);
1131 /* It worked! Now, poke the actual hardware */
1132 for_each_cfg_sme(fwspec, i, idx) {
1133 arm_smmu_write_sme(smmu, idx);
1134 smmu->s2crs[idx].group = group;
1137 mutex_unlock(&smmu->stream_map_mutex);
1138 return 0;
1140 out_err:
1141 while (i--) {
1142 arm_smmu_free_sme(smmu, cfg->smendx[i]);
1143 cfg->smendx[i] = INVALID_SMENDX;
1145 mutex_unlock(&smmu->stream_map_mutex);
1146 return ret;
1149 static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1151 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1152 struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1153 int i, idx;
1155 mutex_lock(&smmu->stream_map_mutex);
1156 for_each_cfg_sme(fwspec, i, idx) {
1157 if (arm_smmu_free_sme(smmu, idx))
1158 arm_smmu_write_sme(smmu, idx);
1159 cfg->smendx[i] = INVALID_SMENDX;
1161 mutex_unlock(&smmu->stream_map_mutex);
1164 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1165 struct iommu_fwspec *fwspec)
1167 struct arm_smmu_device *smmu = smmu_domain->smmu;
1168 struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1169 u8 cbndx = smmu_domain->cfg.cbndx;
1170 enum arm_smmu_s2cr_type type;
1171 int i, idx;
1173 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1174 type = S2CR_TYPE_BYPASS;
1175 else
1176 type = S2CR_TYPE_TRANS;
1178 for_each_cfg_sme(fwspec, i, idx) {
1179 if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1180 continue;
1182 s2cr[idx].type = type;
1183 s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1184 s2cr[idx].cbndx = cbndx;
1185 arm_smmu_write_s2cr(smmu, idx);
1187 return 0;
1190 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1192 int ret;
1193 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1194 struct arm_smmu_device *smmu;
1195 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1197 if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1198 dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1199 return -ENXIO;
1203 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1204 * domains between of_xlate() and add_device() - we have no way to cope
1205 * with that, so until ARM gets converted to rely on groups and default
1206 * domains, just say no (but more politely than by dereferencing NULL).
1207 * This should be at least a WARN_ON once that's sorted.
1209 if (!fwspec->iommu_priv)
1210 return -ENODEV;
1212 smmu = fwspec_smmu(fwspec);
1213 /* Ensure that the domain is finalised */
1214 ret = arm_smmu_init_domain_context(domain, smmu);
1215 if (ret < 0)
1216 return ret;
1219 * Sanity check the domain. We don't support domains across
1220 * different SMMUs.
1222 if (smmu_domain->smmu != smmu) {
1223 dev_err(dev,
1224 "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1225 dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1226 return -EINVAL;
1229 /* Looks ok, so add the device to the domain */
1230 return arm_smmu_domain_add_master(smmu_domain, fwspec);
1233 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1234 phys_addr_t paddr, size_t size, int prot)
1236 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1238 if (!ops)
1239 return -ENODEV;
1241 return ops->map(ops, iova, paddr, size, prot);
1244 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
1245 size_t size)
1247 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1249 if (!ops)
1250 return 0;
1252 return ops->unmap(ops, iova, size);
1255 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1257 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1259 if (smmu_domain->tlb_ops)
1260 smmu_domain->tlb_ops->tlb_sync(smmu_domain);
1263 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1264 dma_addr_t iova)
1266 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1267 struct arm_smmu_device *smmu = smmu_domain->smmu;
1268 struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1269 struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1270 struct device *dev = smmu->dev;
1271 void __iomem *cb_base;
1272 u32 tmp;
1273 u64 phys;
1274 unsigned long va, flags;
1276 cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1278 spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1279 /* ATS1 registers can only be written atomically */
1280 va = iova & ~0xfffUL;
1281 if (smmu->version == ARM_SMMU_V2)
1282 smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
1283 else /* Register is only 32-bit in v1 */
1284 writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1286 if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
1287 !(tmp & ATSR_ACTIVE), 5, 50)) {
1288 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1289 dev_err(dev,
1290 "iova to phys timed out on %pad. Falling back to software table walk.\n",
1291 &iova);
1292 return ops->iova_to_phys(ops, iova);
1295 phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1296 spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1297 if (phys & CB_PAR_F) {
1298 dev_err(dev, "translation fault!\n");
1299 dev_err(dev, "PAR = 0x%llx\n", phys);
1300 return 0;
1303 return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1306 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1307 dma_addr_t iova)
1309 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1310 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1312 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1313 return iova;
1315 if (!ops)
1316 return 0;
1318 if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1319 smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1320 return arm_smmu_iova_to_phys_hard(domain, iova);
1322 return ops->iova_to_phys(ops, iova);
1325 static bool arm_smmu_capable(enum iommu_cap cap)
1327 switch (cap) {
1328 case IOMMU_CAP_CACHE_COHERENCY:
1330 * Return true here as the SMMU can always send out coherent
1331 * requests.
1333 return true;
1334 case IOMMU_CAP_NOEXEC:
1335 return true;
1336 default:
1337 return false;
1341 static int arm_smmu_match_node(struct device *dev, void *data)
1343 return dev->fwnode == data;
1346 static
1347 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1349 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1350 fwnode, arm_smmu_match_node);
1351 put_device(dev);
1352 return dev ? dev_get_drvdata(dev) : NULL;
1355 static int arm_smmu_add_device(struct device *dev)
1357 struct arm_smmu_device *smmu;
1358 struct arm_smmu_master_cfg *cfg;
1359 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1360 int i, ret;
1362 if (using_legacy_binding) {
1363 ret = arm_smmu_register_legacy_master(dev, &smmu);
1366 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1367 * will allocate/initialise a new one. Thus we need to update fwspec for
1368 * later use.
1370 fwspec = dev->iommu_fwspec;
1371 if (ret)
1372 goto out_free;
1373 } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1374 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1375 } else {
1376 return -ENODEV;
1379 ret = -EINVAL;
1380 for (i = 0; i < fwspec->num_ids; i++) {
1381 u16 sid = fwspec->ids[i];
1382 u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1384 if (sid & ~smmu->streamid_mask) {
1385 dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1386 sid, smmu->streamid_mask);
1387 goto out_free;
1389 if (mask & ~smmu->smr_mask_mask) {
1390 dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1391 mask, smmu->smr_mask_mask);
1392 goto out_free;
1396 ret = -ENOMEM;
1397 cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1398 GFP_KERNEL);
1399 if (!cfg)
1400 goto out_free;
1402 cfg->smmu = smmu;
1403 fwspec->iommu_priv = cfg;
1404 while (i--)
1405 cfg->smendx[i] = INVALID_SMENDX;
1407 ret = arm_smmu_master_alloc_smes(dev);
1408 if (ret)
1409 goto out_cfg_free;
1411 iommu_device_link(&smmu->iommu, dev);
1413 return 0;
1415 out_cfg_free:
1416 kfree(cfg);
1417 out_free:
1418 iommu_fwspec_free(dev);
1419 return ret;
1422 static void arm_smmu_remove_device(struct device *dev)
1424 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1425 struct arm_smmu_master_cfg *cfg;
1426 struct arm_smmu_device *smmu;
1429 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1430 return;
1432 cfg = fwspec->iommu_priv;
1433 smmu = cfg->smmu;
1435 iommu_device_unlink(&smmu->iommu, dev);
1436 arm_smmu_master_free_smes(fwspec);
1437 iommu_group_remove_device(dev);
1438 kfree(fwspec->iommu_priv);
1439 iommu_fwspec_free(dev);
1442 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1444 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1445 struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1446 struct iommu_group *group = NULL;
1447 int i, idx;
1449 for_each_cfg_sme(fwspec, i, idx) {
1450 if (group && smmu->s2crs[idx].group &&
1451 group != smmu->s2crs[idx].group)
1452 return ERR_PTR(-EINVAL);
1454 group = smmu->s2crs[idx].group;
1457 if (group)
1458 return iommu_group_ref_get(group);
1460 if (dev_is_pci(dev))
1461 group = pci_device_group(dev);
1462 else
1463 group = generic_device_group(dev);
1465 return group;
1468 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1469 enum iommu_attr attr, void *data)
1471 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1473 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1474 return -EINVAL;
1476 switch (attr) {
1477 case DOMAIN_ATTR_NESTING:
1478 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1479 return 0;
1480 default:
1481 return -ENODEV;
1485 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1486 enum iommu_attr attr, void *data)
1488 int ret = 0;
1489 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1491 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1492 return -EINVAL;
1494 mutex_lock(&smmu_domain->init_mutex);
1496 switch (attr) {
1497 case DOMAIN_ATTR_NESTING:
1498 if (smmu_domain->smmu) {
1499 ret = -EPERM;
1500 goto out_unlock;
1503 if (*(int *)data)
1504 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1505 else
1506 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1508 break;
1509 default:
1510 ret = -ENODEV;
1513 out_unlock:
1514 mutex_unlock(&smmu_domain->init_mutex);
1515 return ret;
1518 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1520 u32 mask, fwid = 0;
1522 if (args->args_count > 0)
1523 fwid |= (u16)args->args[0];
1525 if (args->args_count > 1)
1526 fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1527 else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1528 fwid |= (u16)mask << SMR_MASK_SHIFT;
1530 return iommu_fwspec_add_ids(dev, &fwid, 1);
1533 static void arm_smmu_get_resv_regions(struct device *dev,
1534 struct list_head *head)
1536 struct iommu_resv_region *region;
1537 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1539 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1540 prot, IOMMU_RESV_SW_MSI);
1541 if (!region)
1542 return;
1544 list_add_tail(&region->list, head);
1546 iommu_dma_get_resv_regions(dev, head);
1549 static void arm_smmu_put_resv_regions(struct device *dev,
1550 struct list_head *head)
1552 struct iommu_resv_region *entry, *next;
1554 list_for_each_entry_safe(entry, next, head, list)
1555 kfree(entry);
1558 static struct iommu_ops arm_smmu_ops = {
1559 .capable = arm_smmu_capable,
1560 .domain_alloc = arm_smmu_domain_alloc,
1561 .domain_free = arm_smmu_domain_free,
1562 .attach_dev = arm_smmu_attach_dev,
1563 .map = arm_smmu_map,
1564 .unmap = arm_smmu_unmap,
1565 .map_sg = default_iommu_map_sg,
1566 .flush_iotlb_all = arm_smmu_iotlb_sync,
1567 .iotlb_sync = arm_smmu_iotlb_sync,
1568 .iova_to_phys = arm_smmu_iova_to_phys,
1569 .add_device = arm_smmu_add_device,
1570 .remove_device = arm_smmu_remove_device,
1571 .device_group = arm_smmu_device_group,
1572 .domain_get_attr = arm_smmu_domain_get_attr,
1573 .domain_set_attr = arm_smmu_domain_set_attr,
1574 .of_xlate = arm_smmu_of_xlate,
1575 .get_resv_regions = arm_smmu_get_resv_regions,
1576 .put_resv_regions = arm_smmu_put_resv_regions,
1577 .pgsize_bitmap = -1UL, /* Restricted during device attach */
1580 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1582 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1583 int i;
1584 u32 reg, major;
1586 /* clear global FSR */
1587 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1588 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1591 * Reset stream mapping groups: Initial values mark all SMRn as
1592 * invalid and all S2CRn as bypass unless overridden.
1594 for (i = 0; i < smmu->num_mapping_groups; ++i)
1595 arm_smmu_write_sme(smmu, i);
1597 if (smmu->model == ARM_MMU500) {
1599 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
1600 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
1601 * bit is only present in MMU-500r2 onwards.
1603 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
1604 major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1605 reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1606 if (major >= 2)
1607 reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
1609 * Allow unmatched Stream IDs to allocate bypass
1610 * TLB entries for reduced latency.
1612 reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1613 writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
1616 /* Make sure all context banks are disabled and clear CB_FSR */
1617 for (i = 0; i < smmu->num_context_banks; ++i) {
1618 void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
1620 arm_smmu_write_context_bank(smmu, i);
1621 writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1623 * Disable MMU-500's not-particularly-beneficial next-page
1624 * prefetcher for the sake of errata #841119 and #826419.
1626 if (smmu->model == ARM_MMU500) {
1627 reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
1628 reg &= ~ARM_MMU500_ACTLR_CPRE;
1629 writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
1633 /* Invalidate the TLB, just in case */
1634 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
1635 writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
1637 reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1639 /* Enable fault reporting */
1640 reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1642 /* Disable TLB broadcasting. */
1643 reg |= (sCR0_VMIDPNE | sCR0_PTM);
1645 /* Enable client access, handling unmatched streams as appropriate */
1646 reg &= ~sCR0_CLIENTPD;
1647 if (disable_bypass)
1648 reg |= sCR0_USFCFG;
1649 else
1650 reg &= ~sCR0_USFCFG;
1652 /* Disable forced broadcasting */
1653 reg &= ~sCR0_FB;
1655 /* Don't upgrade barriers */
1656 reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1658 if (smmu->features & ARM_SMMU_FEAT_VMID16)
1659 reg |= sCR0_VMID16EN;
1661 if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1662 reg |= sCR0_EXIDENABLE;
1664 /* Push the button */
1665 arm_smmu_tlb_sync_global(smmu);
1666 writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1669 static int arm_smmu_id_size_to_bits(int size)
1671 switch (size) {
1672 case 0:
1673 return 32;
1674 case 1:
1675 return 36;
1676 case 2:
1677 return 40;
1678 case 3:
1679 return 42;
1680 case 4:
1681 return 44;
1682 case 5:
1683 default:
1684 return 48;
1688 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1690 unsigned long size;
1691 void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1692 u32 id;
1693 bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1694 int i;
1696 dev_notice(smmu->dev, "probing hardware configuration...\n");
1697 dev_notice(smmu->dev, "SMMUv%d with:\n",
1698 smmu->version == ARM_SMMU_V2 ? 2 : 1);
1700 /* ID0 */
1701 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1703 /* Restrict available stages based on module parameter */
1704 if (force_stage == 1)
1705 id &= ~(ID0_S2TS | ID0_NTS);
1706 else if (force_stage == 2)
1707 id &= ~(ID0_S1TS | ID0_NTS);
1709 if (id & ID0_S1TS) {
1710 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1711 dev_notice(smmu->dev, "\tstage 1 translation\n");
1714 if (id & ID0_S2TS) {
1715 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1716 dev_notice(smmu->dev, "\tstage 2 translation\n");
1719 if (id & ID0_NTS) {
1720 smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1721 dev_notice(smmu->dev, "\tnested translation\n");
1724 if (!(smmu->features &
1725 (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1726 dev_err(smmu->dev, "\tno translation support!\n");
1727 return -ENODEV;
1730 if ((id & ID0_S1TS) &&
1731 ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1732 smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1733 dev_notice(smmu->dev, "\taddress translation ops\n");
1737 * In order for DMA API calls to work properly, we must defer to what
1738 * the FW says about coherency, regardless of what the hardware claims.
1739 * Fortunately, this also opens up a workaround for systems where the
1740 * ID register value has ended up configured incorrectly.
1742 cttw_reg = !!(id & ID0_CTTW);
1743 if (cttw_fw || cttw_reg)
1744 dev_notice(smmu->dev, "\t%scoherent table walk\n",
1745 cttw_fw ? "" : "non-");
1746 if (cttw_fw != cttw_reg)
1747 dev_notice(smmu->dev,
1748 "\t(IDR0.CTTW overridden by FW configuration)\n");
1750 /* Max. number of entries we have for stream matching/indexing */
1751 if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
1752 smmu->features |= ARM_SMMU_FEAT_EXIDS;
1753 size = 1 << 16;
1754 } else {
1755 size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
1757 smmu->streamid_mask = size - 1;
1758 if (id & ID0_SMS) {
1759 smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1760 size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
1761 if (size == 0) {
1762 dev_err(smmu->dev,
1763 "stream-matching supported, but no SMRs present!\n");
1764 return -ENODEV;
1767 /* Zero-initialised to mark as invalid */
1768 smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1769 GFP_KERNEL);
1770 if (!smmu->smrs)
1771 return -ENOMEM;
1773 dev_notice(smmu->dev,
1774 "\tstream matching with %lu register groups", size);
1776 /* s2cr->type == 0 means translation, so initialise explicitly */
1777 smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1778 GFP_KERNEL);
1779 if (!smmu->s2crs)
1780 return -ENOMEM;
1781 for (i = 0; i < size; i++)
1782 smmu->s2crs[i] = s2cr_init_val;
1784 smmu->num_mapping_groups = size;
1785 mutex_init(&smmu->stream_map_mutex);
1786 spin_lock_init(&smmu->global_sync_lock);
1788 if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
1789 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1790 if (!(id & ID0_PTFS_NO_AARCH32S))
1791 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1794 /* ID1 */
1795 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1796 smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1798 /* Check for size mismatch of SMMU address space from mapped region */
1799 size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1800 size <<= smmu->pgshift;
1801 if (smmu->cb_base != gr0_base + size)
1802 dev_warn(smmu->dev,
1803 "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
1804 size * 2, (smmu->cb_base - gr0_base) * 2);
1806 smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1807 smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
1808 if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1809 dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1810 return -ENODEV;
1812 dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1813 smmu->num_context_banks, smmu->num_s2_context_banks);
1815 * Cavium CN88xx erratum #27704.
1816 * Ensure ASID and VMID allocation is unique across all SMMUs in
1817 * the system.
1819 if (smmu->model == CAVIUM_SMMUV2) {
1820 smmu->cavium_id_base =
1821 atomic_add_return(smmu->num_context_banks,
1822 &cavium_smmu_context_count);
1823 smmu->cavium_id_base -= smmu->num_context_banks;
1824 dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1826 smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1827 sizeof(*smmu->cbs), GFP_KERNEL);
1828 if (!smmu->cbs)
1829 return -ENOMEM;
1831 /* ID2 */
1832 id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
1833 size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1834 smmu->ipa_size = size;
1836 /* The output mask is also applied for bypass */
1837 size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1838 smmu->pa_size = size;
1840 if (id & ID2_VMID16)
1841 smmu->features |= ARM_SMMU_FEAT_VMID16;
1844 * What the page table walker can address actually depends on which
1845 * descriptor format is in use, but since a) we don't know that yet,
1846 * and b) it can vary per context bank, this will have to do...
1848 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1849 dev_warn(smmu->dev,
1850 "failed to set DMA mask for table walker\n");
1852 if (smmu->version < ARM_SMMU_V2) {
1853 smmu->va_size = smmu->ipa_size;
1854 if (smmu->version == ARM_SMMU_V1_64K)
1855 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1856 } else {
1857 size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1858 smmu->va_size = arm_smmu_id_size_to_bits(size);
1859 if (id & ID2_PTFS_4K)
1860 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1861 if (id & ID2_PTFS_16K)
1862 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1863 if (id & ID2_PTFS_64K)
1864 smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1867 /* Now we've corralled the various formats, what'll it do? */
1868 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1869 smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1870 if (smmu->features &
1871 (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1872 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1873 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1874 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1875 if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1876 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1878 if (arm_smmu_ops.pgsize_bitmap == -1UL)
1879 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1880 else
1881 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1882 dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1883 smmu->pgsize_bitmap);
1886 if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1887 dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1888 smmu->va_size, smmu->ipa_size);
1890 if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1891 dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1892 smmu->ipa_size, smmu->pa_size);
1894 return 0;
1897 struct arm_smmu_match_data {
1898 enum arm_smmu_arch_version version;
1899 enum arm_smmu_implementation model;
1902 #define ARM_SMMU_MATCH_DATA(name, ver, imp) \
1903 static struct arm_smmu_match_data name = { .version = ver, .model = imp }
1905 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1906 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1907 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1908 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1909 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1911 static const struct of_device_id arm_smmu_of_match[] = {
1912 { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1913 { .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1914 { .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1915 { .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1916 { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1917 { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1918 { },
1920 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1922 #ifdef CONFIG_ACPI
1923 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1925 int ret = 0;
1927 switch (model) {
1928 case ACPI_IORT_SMMU_V1:
1929 case ACPI_IORT_SMMU_CORELINK_MMU400:
1930 smmu->version = ARM_SMMU_V1;
1931 smmu->model = GENERIC_SMMU;
1932 break;
1933 case ACPI_IORT_SMMU_CORELINK_MMU401:
1934 smmu->version = ARM_SMMU_V1_64K;
1935 smmu->model = GENERIC_SMMU;
1936 break;
1937 case ACPI_IORT_SMMU_V2:
1938 smmu->version = ARM_SMMU_V2;
1939 smmu->model = GENERIC_SMMU;
1940 break;
1941 case ACPI_IORT_SMMU_CORELINK_MMU500:
1942 smmu->version = ARM_SMMU_V2;
1943 smmu->model = ARM_MMU500;
1944 break;
1945 case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1946 smmu->version = ARM_SMMU_V2;
1947 smmu->model = CAVIUM_SMMUV2;
1948 break;
1949 default:
1950 ret = -ENODEV;
1953 return ret;
1956 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1957 struct arm_smmu_device *smmu)
1959 struct device *dev = smmu->dev;
1960 struct acpi_iort_node *node =
1961 *(struct acpi_iort_node **)dev_get_platdata(dev);
1962 struct acpi_iort_smmu *iort_smmu;
1963 int ret;
1965 /* Retrieve SMMU1/2 specific data */
1966 iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1968 ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1969 if (ret < 0)
1970 return ret;
1972 /* Ignore the configuration access interrupt */
1973 smmu->num_global_irqs = 1;
1975 if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1976 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1978 return 0;
1980 #else
1981 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
1982 struct arm_smmu_device *smmu)
1984 return -ENODEV;
1986 #endif
1988 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
1989 struct arm_smmu_device *smmu)
1991 const struct arm_smmu_match_data *data;
1992 struct device *dev = &pdev->dev;
1993 bool legacy_binding;
1995 if (of_property_read_u32(dev->of_node, "#global-interrupts",
1996 &smmu->num_global_irqs)) {
1997 dev_err(dev, "missing #global-interrupts property\n");
1998 return -ENODEV;
2001 data = of_device_get_match_data(dev);
2002 smmu->version = data->version;
2003 smmu->model = data->model;
2005 parse_driver_options(smmu);
2007 legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2008 if (legacy_binding && !using_generic_binding) {
2009 if (!using_legacy_binding)
2010 pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
2011 using_legacy_binding = true;
2012 } else if (!legacy_binding && !using_legacy_binding) {
2013 using_generic_binding = true;
2014 } else {
2015 dev_err(dev, "not probing due to mismatched DT properties\n");
2016 return -ENODEV;
2019 if (of_dma_is_coherent(dev->of_node))
2020 smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2022 return 0;
2025 static void arm_smmu_bus_init(void)
2027 /* Oh, for a proper bus abstraction */
2028 if (!iommu_present(&platform_bus_type))
2029 bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2030 #ifdef CONFIG_ARM_AMBA
2031 if (!iommu_present(&amba_bustype))
2032 bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2033 #endif
2034 #ifdef CONFIG_PCI
2035 if (!iommu_present(&pci_bus_type)) {
2036 pci_request_acs();
2037 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2039 #endif
2042 static int arm_smmu_device_probe(struct platform_device *pdev)
2044 struct resource *res;
2045 resource_size_t ioaddr;
2046 struct arm_smmu_device *smmu;
2047 struct device *dev = &pdev->dev;
2048 int num_irqs, i, err;
2050 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2051 if (!smmu) {
2052 dev_err(dev, "failed to allocate arm_smmu_device\n");
2053 return -ENOMEM;
2055 smmu->dev = dev;
2057 if (dev->of_node)
2058 err = arm_smmu_device_dt_probe(pdev, smmu);
2059 else
2060 err = arm_smmu_device_acpi_probe(pdev, smmu);
2062 if (err)
2063 return err;
2065 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2066 ioaddr = res->start;
2067 smmu->base = devm_ioremap_resource(dev, res);
2068 if (IS_ERR(smmu->base))
2069 return PTR_ERR(smmu->base);
2070 smmu->cb_base = smmu->base + resource_size(res) / 2;
2072 num_irqs = 0;
2073 while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
2074 num_irqs++;
2075 if (num_irqs > smmu->num_global_irqs)
2076 smmu->num_context_irqs++;
2079 if (!smmu->num_context_irqs) {
2080 dev_err(dev, "found %d interrupts but expected at least %d\n",
2081 num_irqs, smmu->num_global_irqs + 1);
2082 return -ENODEV;
2085 smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
2086 GFP_KERNEL);
2087 if (!smmu->irqs) {
2088 dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
2089 return -ENOMEM;
2092 for (i = 0; i < num_irqs; ++i) {
2093 int irq = platform_get_irq(pdev, i);
2095 if (irq < 0) {
2096 dev_err(dev, "failed to get irq index %d\n", i);
2097 return -ENODEV;
2099 smmu->irqs[i] = irq;
2102 err = arm_smmu_device_cfg_probe(smmu);
2103 if (err)
2104 return err;
2106 if (smmu->version == ARM_SMMU_V2 &&
2107 smmu->num_context_banks != smmu->num_context_irqs) {
2108 dev_err(dev,
2109 "found only %d context interrupt(s) but %d required\n",
2110 smmu->num_context_irqs, smmu->num_context_banks);
2111 return -ENODEV;
2114 for (i = 0; i < smmu->num_global_irqs; ++i) {
2115 err = devm_request_irq(smmu->dev, smmu->irqs[i],
2116 arm_smmu_global_fault,
2117 IRQF_SHARED,
2118 "arm-smmu global fault",
2119 smmu);
2120 if (err) {
2121 dev_err(dev, "failed to request global IRQ %d (%u)\n",
2122 i, smmu->irqs[i]);
2123 return err;
2127 err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2128 "smmu.%pa", &ioaddr);
2129 if (err) {
2130 dev_err(dev, "Failed to register iommu in sysfs\n");
2131 return err;
2134 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2135 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2137 err = iommu_device_register(&smmu->iommu);
2138 if (err) {
2139 dev_err(dev, "Failed to register iommu\n");
2140 return err;
2143 platform_set_drvdata(pdev, smmu);
2144 arm_smmu_device_reset(smmu);
2145 arm_smmu_test_smr_masks(smmu);
2148 * For ACPI and generic DT bindings, an SMMU will be probed before
2149 * any device which might need it, so we want the bus ops in place
2150 * ready to handle default domain setup as soon as any SMMU exists.
2152 if (!using_legacy_binding)
2153 arm_smmu_bus_init();
2155 return 0;
2159 * With the legacy DT binding in play, though, we have no guarantees about
2160 * probe order, but then we're also not doing default domains, so we can
2161 * delay setting bus ops until we're sure every possible SMMU is ready,
2162 * and that way ensure that no add_device() calls get missed.
2164 static int arm_smmu_legacy_bus_init(void)
2166 if (using_legacy_binding)
2167 arm_smmu_bus_init();
2168 return 0;
2170 device_initcall_sync(arm_smmu_legacy_bus_init);
2172 static int arm_smmu_device_remove(struct platform_device *pdev)
2174 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2176 if (!smmu)
2177 return -ENODEV;
2179 if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2180 dev_err(&pdev->dev, "removing device with active domains!\n");
2182 /* Turn the thing off */
2183 writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2184 return 0;
2187 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2189 arm_smmu_device_remove(pdev);
2192 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2194 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2196 arm_smmu_device_reset(smmu);
2197 return 0;
2200 static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
2202 static struct platform_driver arm_smmu_driver = {
2203 .driver = {
2204 .name = "arm-smmu",
2205 .of_match_table = of_match_ptr(arm_smmu_of_match),
2206 .pm = &arm_smmu_pm_ops,
2208 .probe = arm_smmu_device_probe,
2209 .remove = arm_smmu_device_remove,
2210 .shutdown = arm_smmu_device_shutdown,
2212 module_platform_driver(arm_smmu_driver);
2214 IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1");
2215 IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2");
2216 IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400");
2217 IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401");
2218 IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500");
2219 IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2");
2221 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2222 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2223 MODULE_LICENSE("GPL v2");