1 // SPDX-License-Identifier: GPL-2.0-only
2 // Copyright (C) 2019-2020 NVIDIA CORPORATION. All rights reserved.
4 #include <linux/bitfield.h>
5 #include <linux/delay.h>
7 #include <linux/platform_device.h>
8 #include <linux/slab.h>
10 #include <soc/tegra/mc.h>
15 * Tegra194 has three ARM MMU-500 Instances.
16 * Two of them are used together and must be programmed identically for
17 * interleaved IOVA accesses across them and translates accesses from
18 * non-isochronous HW devices.
19 * Third one is used for translating accesses from isochronous HW devices.
21 * In addition, the SMMU driver needs to coordinate with the memory controller
22 * driver to ensure that the right SID override is programmed for any given
23 * memory client. This is necessary to allow for use-case such as seamlessly
24 * handing over the display controller configuration from the firmware to the
27 * This implementation supports programming of the two instances that must
28 * be programmed identically and takes care of invoking the memory controller
29 * driver for SID override programming after devices have been attached to an
32 #define MAX_SMMU_INSTANCES 2
35 struct arm_smmu_device smmu
;
36 void __iomem
*bases
[MAX_SMMU_INSTANCES
];
37 unsigned int num_instances
;
41 static inline struct nvidia_smmu
*to_nvidia_smmu(struct arm_smmu_device
*smmu
)
43 return container_of(smmu
, struct nvidia_smmu
, smmu
);
46 static inline void __iomem
*nvidia_smmu_page(struct arm_smmu_device
*smmu
,
47 unsigned int inst
, int page
)
49 struct nvidia_smmu
*nvidia_smmu
;
51 nvidia_smmu
= container_of(smmu
, struct nvidia_smmu
, smmu
);
52 return nvidia_smmu
->bases
[inst
] + (page
<< smmu
->pgshift
);
55 static u32
nvidia_smmu_read_reg(struct arm_smmu_device
*smmu
,
58 void __iomem
*reg
= nvidia_smmu_page(smmu
, 0, page
) + offset
;
60 return readl_relaxed(reg
);
63 static void nvidia_smmu_write_reg(struct arm_smmu_device
*smmu
,
64 int page
, int offset
, u32 val
)
66 struct nvidia_smmu
*nvidia
= to_nvidia_smmu(smmu
);
69 for (i
= 0; i
< nvidia
->num_instances
; i
++) {
70 void __iomem
*reg
= nvidia_smmu_page(smmu
, i
, page
) + offset
;
72 writel_relaxed(val
, reg
);
76 static u64
nvidia_smmu_read_reg64(struct arm_smmu_device
*smmu
,
79 void __iomem
*reg
= nvidia_smmu_page(smmu
, 0, page
) + offset
;
81 return readq_relaxed(reg
);
84 static void nvidia_smmu_write_reg64(struct arm_smmu_device
*smmu
,
85 int page
, int offset
, u64 val
)
87 struct nvidia_smmu
*nvidia
= to_nvidia_smmu(smmu
);
90 for (i
= 0; i
< nvidia
->num_instances
; i
++) {
91 void __iomem
*reg
= nvidia_smmu_page(smmu
, i
, page
) + offset
;
93 writeq_relaxed(val
, reg
);
97 static void nvidia_smmu_tlb_sync(struct arm_smmu_device
*smmu
, int page
,
100 struct nvidia_smmu
*nvidia
= to_nvidia_smmu(smmu
);
103 arm_smmu_writel(smmu
, page
, sync
, 0);
105 for (delay
= 1; delay
< TLB_LOOP_TIMEOUT
; delay
*= 2) {
106 unsigned int spin_cnt
;
108 for (spin_cnt
= TLB_SPIN_COUNT
; spin_cnt
> 0; spin_cnt
--) {
112 for (i
= 0; i
< nvidia
->num_instances
; i
++) {
115 reg
= nvidia_smmu_page(smmu
, i
, page
) + status
;
116 val
|= readl_relaxed(reg
);
119 if (!(val
& ARM_SMMU_sTLBGSTATUS_GSACTIVE
))
128 dev_err_ratelimited(smmu
->dev
,
129 "TLB sync timed out -- SMMU may be deadlocked\n");
132 static int nvidia_smmu_reset(struct arm_smmu_device
*smmu
)
134 struct nvidia_smmu
*nvidia
= to_nvidia_smmu(smmu
);
137 for (i
= 0; i
< nvidia
->num_instances
; i
++) {
139 void __iomem
*reg
= nvidia_smmu_page(smmu
, i
, ARM_SMMU_GR0
) +
142 /* clear global FSR */
143 val
= readl_relaxed(reg
);
144 writel_relaxed(val
, reg
);
150 static irqreturn_t
nvidia_smmu_global_fault_inst(int irq
,
151 struct arm_smmu_device
*smmu
,
154 u32 gfsr
, gfsynr0
, gfsynr1
, gfsynr2
;
155 void __iomem
*gr0_base
= nvidia_smmu_page(smmu
, inst
, 0);
157 gfsr
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSR
);
161 gfsynr0
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSYNR0
);
162 gfsynr1
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSYNR1
);
163 gfsynr2
= readl_relaxed(gr0_base
+ ARM_SMMU_GR0_sGFSYNR2
);
165 dev_err_ratelimited(smmu
->dev
,
166 "Unexpected global fault, this could be serious\n");
167 dev_err_ratelimited(smmu
->dev
,
168 "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
169 gfsr
, gfsynr0
, gfsynr1
, gfsynr2
);
171 writel_relaxed(gfsr
, gr0_base
+ ARM_SMMU_GR0_sGFSR
);
175 static irqreturn_t
nvidia_smmu_global_fault(int irq
, void *dev
)
178 irqreturn_t ret
= IRQ_NONE
;
179 struct arm_smmu_device
*smmu
= dev
;
180 struct nvidia_smmu
*nvidia
= to_nvidia_smmu(smmu
);
182 for (inst
= 0; inst
< nvidia
->num_instances
; inst
++) {
185 irq_ret
= nvidia_smmu_global_fault_inst(irq
, smmu
, inst
);
186 if (irq_ret
== IRQ_HANDLED
)
193 static irqreturn_t
nvidia_smmu_context_fault_bank(int irq
,
194 struct arm_smmu_device
*smmu
,
197 u32 fsr
, fsynr
, cbfrsynra
;
199 void __iomem
*gr1_base
= nvidia_smmu_page(smmu
, inst
, 1);
200 void __iomem
*cb_base
= nvidia_smmu_page(smmu
, inst
, smmu
->numpage
+ idx
);
202 fsr
= readl_relaxed(cb_base
+ ARM_SMMU_CB_FSR
);
203 if (!(fsr
& ARM_SMMU_CB_FSR_FAULT
))
206 fsynr
= readl_relaxed(cb_base
+ ARM_SMMU_CB_FSYNR0
);
207 iova
= readq_relaxed(cb_base
+ ARM_SMMU_CB_FAR
);
208 cbfrsynra
= readl_relaxed(gr1_base
+ ARM_SMMU_GR1_CBFRSYNRA(idx
));
210 dev_err_ratelimited(smmu
->dev
,
211 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
212 fsr
, iova
, fsynr
, cbfrsynra
, idx
);
214 writel_relaxed(fsr
, cb_base
+ ARM_SMMU_CB_FSR
);
218 static irqreturn_t
nvidia_smmu_context_fault(int irq
, void *dev
)
222 irqreturn_t ret
= IRQ_NONE
;
223 struct arm_smmu_device
*smmu
;
224 struct arm_smmu_domain
*smmu_domain
= dev
;
225 struct nvidia_smmu
*nvidia
;
227 smmu
= smmu_domain
->smmu
;
228 nvidia
= to_nvidia_smmu(smmu
);
230 for (inst
= 0; inst
< nvidia
->num_instances
; inst
++) {
234 * Interrupt line is shared between all contexts.
235 * Check for faults across all contexts.
237 for (idx
= 0; idx
< smmu
->num_context_banks
; idx
++) {
238 irq_ret
= nvidia_smmu_context_fault_bank(irq
, smmu
,
240 if (irq_ret
== IRQ_HANDLED
)
248 static void nvidia_smmu_probe_finalize(struct arm_smmu_device
*smmu
, struct device
*dev
)
250 struct nvidia_smmu
*nvidia
= to_nvidia_smmu(smmu
);
253 err
= tegra_mc_probe_device(nvidia
->mc
, dev
);
255 dev_err(smmu
->dev
, "memory controller probe failed for %s: %d\n",
259 static int nvidia_smmu_init_context(struct arm_smmu_domain
*smmu_domain
,
260 struct io_pgtable_cfg
*pgtbl_cfg
,
263 struct arm_smmu_device
*smmu
= smmu_domain
->smmu
;
264 const struct device_node
*np
= smmu
->dev
->of_node
;
267 * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache
268 * entries to not be invalidated correctly. The problem is that the walk
269 * cache index generated for IOVA is not same across translation and
270 * invalidation requests. This is leading to page faults when PMD entry
271 * is released during unmap and populated with new PTE table during
272 * subsequent map request. Disabling large page mappings avoids the
273 * release of PMD entry and avoid translations seeing stale PMD entry in
275 * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and
278 if (of_device_is_compatible(np
, "nvidia,tegra234-smmu") ||
279 of_device_is_compatible(np
, "nvidia,tegra194-smmu")) {
280 smmu
->pgsize_bitmap
&= GENMASK(PAGE_SHIFT
, 0);
281 pgtbl_cfg
->pgsize_bitmap
= smmu
->pgsize_bitmap
;
287 static const struct arm_smmu_impl nvidia_smmu_impl
= {
288 .read_reg
= nvidia_smmu_read_reg
,
289 .write_reg
= nvidia_smmu_write_reg
,
290 .read_reg64
= nvidia_smmu_read_reg64
,
291 .write_reg64
= nvidia_smmu_write_reg64
,
292 .reset
= nvidia_smmu_reset
,
293 .tlb_sync
= nvidia_smmu_tlb_sync
,
294 .global_fault
= nvidia_smmu_global_fault
,
295 .context_fault
= nvidia_smmu_context_fault
,
296 .probe_finalize
= nvidia_smmu_probe_finalize
,
297 .init_context
= nvidia_smmu_init_context
,
300 static const struct arm_smmu_impl nvidia_smmu_single_impl
= {
301 .probe_finalize
= nvidia_smmu_probe_finalize
,
302 .init_context
= nvidia_smmu_init_context
,
305 struct arm_smmu_device
*nvidia_smmu_impl_init(struct arm_smmu_device
*smmu
)
307 struct resource
*res
;
308 struct device
*dev
= smmu
->dev
;
309 struct nvidia_smmu
*nvidia_smmu
;
310 struct platform_device
*pdev
= to_platform_device(dev
);
313 nvidia_smmu
= devm_krealloc(dev
, smmu
, sizeof(*nvidia_smmu
), GFP_KERNEL
);
315 return ERR_PTR(-ENOMEM
);
317 nvidia_smmu
->mc
= devm_tegra_memory_controller_get(dev
);
318 if (IS_ERR(nvidia_smmu
->mc
))
319 return ERR_CAST(nvidia_smmu
->mc
);
321 /* Instance 0 is ioremapped by arm-smmu.c. */
322 nvidia_smmu
->bases
[0] = smmu
->base
;
323 nvidia_smmu
->num_instances
++;
325 for (i
= 1; i
< MAX_SMMU_INSTANCES
; i
++) {
326 res
= platform_get_resource(pdev
, IORESOURCE_MEM
, i
);
330 nvidia_smmu
->bases
[i
] = devm_ioremap_resource(dev
, res
);
331 if (IS_ERR(nvidia_smmu
->bases
[i
]))
332 return ERR_CAST(nvidia_smmu
->bases
[i
]);
334 nvidia_smmu
->num_instances
++;
337 if (nvidia_smmu
->num_instances
== 1)
338 nvidia_smmu
->smmu
.impl
= &nvidia_smmu_single_impl
;
340 nvidia_smmu
->smmu
.impl
= &nvidia_smmu_impl
;
342 return &nvidia_smmu
->smmu
;