xfs: add full xfs_dqblk verifier
[linux/fpc-iii.git] / drivers / iommu / arm-smmu-v3.c
blob1d647104bccc49fd7af44985b3797bdb69aa1150
1 /*
2 * IOMMU API for ARM architected SMMUv3 implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright (C) 2015 ARM Limited
18 * Author: Will Deacon <will.deacon@arm.com>
20 * This driver is powered by bad coffee and bombay mix.
23 #include <linux/acpi.h>
24 #include <linux/acpi_iort.h>
25 #include <linux/bitfield.h>
26 #include <linux/bitops.h>
27 #include <linux/delay.h>
28 #include <linux/dma-iommu.h>
29 #include <linux/err.h>
30 #include <linux/interrupt.h>
31 #include <linux/iommu.h>
32 #include <linux/iopoll.h>
33 #include <linux/module.h>
34 #include <linux/msi.h>
35 #include <linux/of.h>
36 #include <linux/of_address.h>
37 #include <linux/of_iommu.h>
38 #include <linux/of_platform.h>
39 #include <linux/pci.h>
40 #include <linux/platform_device.h>
42 #include <linux/amba/bus.h>
44 #include "io-pgtable.h"
46 /* MMIO registers */
47 #define ARM_SMMU_IDR0 0x0
48 #define IDR0_ST_LVL GENMASK(28, 27)
49 #define IDR0_ST_LVL_2LVL 1
50 #define IDR0_STALL_MODEL GENMASK(25, 24)
51 #define IDR0_STALL_MODEL_STALL 0
52 #define IDR0_STALL_MODEL_FORCE 2
53 #define IDR0_TTENDIAN GENMASK(22, 21)
54 #define IDR0_TTENDIAN_MIXED 0
55 #define IDR0_TTENDIAN_LE 2
56 #define IDR0_TTENDIAN_BE 3
57 #define IDR0_CD2L (1 << 19)
58 #define IDR0_VMID16 (1 << 18)
59 #define IDR0_PRI (1 << 16)
60 #define IDR0_SEV (1 << 14)
61 #define IDR0_MSI (1 << 13)
62 #define IDR0_ASID16 (1 << 12)
63 #define IDR0_ATS (1 << 10)
64 #define IDR0_HYP (1 << 9)
65 #define IDR0_COHACC (1 << 4)
66 #define IDR0_TTF GENMASK(3, 2)
67 #define IDR0_TTF_AARCH64 2
68 #define IDR0_TTF_AARCH32_64 3
69 #define IDR0_S1P (1 << 1)
70 #define IDR0_S2P (1 << 0)
72 #define ARM_SMMU_IDR1 0x4
73 #define IDR1_TABLES_PRESET (1 << 30)
74 #define IDR1_QUEUES_PRESET (1 << 29)
75 #define IDR1_REL (1 << 28)
76 #define IDR1_CMDQS GENMASK(25, 21)
77 #define IDR1_EVTQS GENMASK(20, 16)
78 #define IDR1_PRIQS GENMASK(15, 11)
79 #define IDR1_SSIDSIZE GENMASK(10, 6)
80 #define IDR1_SIDSIZE GENMASK(5, 0)
82 #define ARM_SMMU_IDR5 0x14
83 #define IDR5_STALL_MAX GENMASK(31, 16)
84 #define IDR5_GRAN64K (1 << 6)
85 #define IDR5_GRAN16K (1 << 5)
86 #define IDR5_GRAN4K (1 << 4)
87 #define IDR5_OAS GENMASK(2, 0)
88 #define IDR5_OAS_32_BIT 0
89 #define IDR5_OAS_36_BIT 1
90 #define IDR5_OAS_40_BIT 2
91 #define IDR5_OAS_42_BIT 3
92 #define IDR5_OAS_44_BIT 4
93 #define IDR5_OAS_48_BIT 5
94 #define IDR5_OAS_52_BIT 6
95 #define IDR5_VAX GENMASK(11, 10)
96 #define IDR5_VAX_52_BIT 1
98 #define ARM_SMMU_CR0 0x20
99 #define CR0_CMDQEN (1 << 3)
100 #define CR0_EVTQEN (1 << 2)
101 #define CR0_PRIQEN (1 << 1)
102 #define CR0_SMMUEN (1 << 0)
104 #define ARM_SMMU_CR0ACK 0x24
106 #define ARM_SMMU_CR1 0x28
107 #define CR1_TABLE_SH GENMASK(11, 10)
108 #define CR1_TABLE_OC GENMASK(9, 8)
109 #define CR1_TABLE_IC GENMASK(7, 6)
110 #define CR1_QUEUE_SH GENMASK(5, 4)
111 #define CR1_QUEUE_OC GENMASK(3, 2)
112 #define CR1_QUEUE_IC GENMASK(1, 0)
113 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
114 #define CR1_CACHE_NC 0
115 #define CR1_CACHE_WB 1
116 #define CR1_CACHE_WT 2
118 #define ARM_SMMU_CR2 0x2c
119 #define CR2_PTM (1 << 2)
120 #define CR2_RECINVSID (1 << 1)
121 #define CR2_E2H (1 << 0)
123 #define ARM_SMMU_GBPA 0x44
124 #define GBPA_UPDATE (1 << 31)
125 #define GBPA_ABORT (1 << 20)
127 #define ARM_SMMU_IRQ_CTRL 0x50
128 #define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
129 #define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
130 #define IRQ_CTRL_GERROR_IRQEN (1 << 0)
132 #define ARM_SMMU_IRQ_CTRLACK 0x54
134 #define ARM_SMMU_GERROR 0x60
135 #define GERROR_SFM_ERR (1 << 8)
136 #define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
137 #define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
138 #define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
139 #define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
140 #define GERROR_PRIQ_ABT_ERR (1 << 3)
141 #define GERROR_EVTQ_ABT_ERR (1 << 2)
142 #define GERROR_CMDQ_ERR (1 << 0)
143 #define GERROR_ERR_MASK 0xfd
145 #define ARM_SMMU_GERRORN 0x64
147 #define ARM_SMMU_GERROR_IRQ_CFG0 0x68
148 #define ARM_SMMU_GERROR_IRQ_CFG1 0x70
149 #define ARM_SMMU_GERROR_IRQ_CFG2 0x74
151 #define ARM_SMMU_STRTAB_BASE 0x80
152 #define STRTAB_BASE_RA (1UL << 62)
153 #define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
155 #define ARM_SMMU_STRTAB_BASE_CFG 0x88
156 #define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
157 #define STRTAB_BASE_CFG_FMT_LINEAR 0
158 #define STRTAB_BASE_CFG_FMT_2LVL 1
159 #define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
160 #define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
162 #define ARM_SMMU_CMDQ_BASE 0x90
163 #define ARM_SMMU_CMDQ_PROD 0x98
164 #define ARM_SMMU_CMDQ_CONS 0x9c
166 #define ARM_SMMU_EVTQ_BASE 0xa0
167 #define ARM_SMMU_EVTQ_PROD 0x100a8
168 #define ARM_SMMU_EVTQ_CONS 0x100ac
169 #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
170 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
171 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
173 #define ARM_SMMU_PRIQ_BASE 0xc0
174 #define ARM_SMMU_PRIQ_PROD 0x100c8
175 #define ARM_SMMU_PRIQ_CONS 0x100cc
176 #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
177 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
178 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
180 /* Common MSI config fields */
181 #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
182 #define MSI_CFG2_SH GENMASK(5, 4)
183 #define MSI_CFG2_MEMATTR GENMASK(3, 0)
185 /* Common memory attribute values */
186 #define ARM_SMMU_SH_NSH 0
187 #define ARM_SMMU_SH_OSH 2
188 #define ARM_SMMU_SH_ISH 3
189 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
190 #define ARM_SMMU_MEMATTR_OIWB 0xf
192 #define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1))
193 #define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift))
194 #define Q_OVERFLOW_FLAG (1 << 31)
195 #define Q_OVF(q, p) ((p) & Q_OVERFLOW_FLAG)
196 #define Q_ENT(q, p) ((q)->base + \
197 Q_IDX(q, p) * (q)->ent_dwords)
199 #define Q_BASE_RWA (1UL << 62)
200 #define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
201 #define Q_BASE_LOG2SIZE GENMASK(4, 0)
204 * Stream table.
206 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
207 * 2lvl: 128k L1 entries,
208 * 256 lazy entries per table (each table covers a PCI bus)
210 #define STRTAB_L1_SZ_SHIFT 20
211 #define STRTAB_SPLIT 8
213 #define STRTAB_L1_DESC_DWORDS 1
214 #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
215 #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
217 #define STRTAB_STE_DWORDS 8
218 #define STRTAB_STE_0_V (1UL << 0)
219 #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
220 #define STRTAB_STE_0_CFG_ABORT 0
221 #define STRTAB_STE_0_CFG_BYPASS 4
222 #define STRTAB_STE_0_CFG_S1_TRANS 5
223 #define STRTAB_STE_0_CFG_S2_TRANS 6
225 #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
226 #define STRTAB_STE_0_S1FMT_LINEAR 0
227 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
228 #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
230 #define STRTAB_STE_1_S1C_CACHE_NC 0UL
231 #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
232 #define STRTAB_STE_1_S1C_CACHE_WT 2UL
233 #define STRTAB_STE_1_S1C_CACHE_WB 3UL
234 #define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
235 #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
236 #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
238 #define STRTAB_STE_1_S1STALLD (1UL << 27)
240 #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
241 #define STRTAB_STE_1_EATS_ABT 0UL
242 #define STRTAB_STE_1_EATS_TRANS 1UL
243 #define STRTAB_STE_1_EATS_S1CHK 2UL
245 #define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
246 #define STRTAB_STE_1_STRW_NSEL1 0UL
247 #define STRTAB_STE_1_STRW_EL2 2UL
249 #define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
250 #define STRTAB_STE_1_SHCFG_INCOMING 1UL
252 #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
253 #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
254 #define STRTAB_STE_2_S2AA64 (1UL << 51)
255 #define STRTAB_STE_2_S2ENDI (1UL << 52)
256 #define STRTAB_STE_2_S2PTW (1UL << 54)
257 #define STRTAB_STE_2_S2R (1UL << 58)
259 #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
261 /* Context descriptor (stage-1 only) */
262 #define CTXDESC_CD_DWORDS 8
263 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
264 #define ARM64_TCR_T0SZ GENMASK_ULL(5, 0)
265 #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
266 #define ARM64_TCR_TG0 GENMASK_ULL(15, 14)
267 #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
268 #define ARM64_TCR_IRGN0 GENMASK_ULL(9, 8)
269 #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
270 #define ARM64_TCR_ORGN0 GENMASK_ULL(11, 10)
271 #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
272 #define ARM64_TCR_SH0 GENMASK_ULL(13, 12)
273 #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
274 #define ARM64_TCR_EPD0 (1ULL << 7)
275 #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
276 #define ARM64_TCR_EPD1 (1ULL << 23)
278 #define CTXDESC_CD_0_ENDI (1UL << 15)
279 #define CTXDESC_CD_0_V (1UL << 31)
281 #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
282 #define ARM64_TCR_IPS GENMASK_ULL(34, 32)
283 #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
284 #define ARM64_TCR_TBI0 (1ULL << 37)
286 #define CTXDESC_CD_0_AA64 (1UL << 41)
287 #define CTXDESC_CD_0_S (1UL << 44)
288 #define CTXDESC_CD_0_R (1UL << 45)
289 #define CTXDESC_CD_0_A (1UL << 46)
290 #define CTXDESC_CD_0_ASET (1UL << 47)
291 #define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
293 #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
295 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
296 #define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
297 FIELD_GET(ARM64_TCR_##fld, tcr))
299 /* Command queue */
300 #define CMDQ_ENT_DWORDS 2
301 #define CMDQ_MAX_SZ_SHIFT 8
303 #define CMDQ_CONS_ERR GENMASK(30, 24)
304 #define CMDQ_ERR_CERROR_NONE_IDX 0
305 #define CMDQ_ERR_CERROR_ILL_IDX 1
306 #define CMDQ_ERR_CERROR_ABT_IDX 2
308 #define CMDQ_0_OP GENMASK_ULL(7, 0)
309 #define CMDQ_0_SSV (1UL << 11)
311 #define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
312 #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
313 #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
315 #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
316 #define CMDQ_CFGI_1_LEAF (1UL << 0)
317 #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
319 #define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
320 #define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
321 #define CMDQ_TLBI_1_LEAF (1UL << 0)
322 #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
323 #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
325 #define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
326 #define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
327 #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
328 #define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
330 #define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
331 #define CMDQ_SYNC_0_CS_NONE 0
332 #define CMDQ_SYNC_0_CS_IRQ 1
333 #define CMDQ_SYNC_0_CS_SEV 2
334 #define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
335 #define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
336 #define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
337 #define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
339 /* Event queue */
340 #define EVTQ_ENT_DWORDS 4
341 #define EVTQ_MAX_SZ_SHIFT 7
343 #define EVTQ_0_ID GENMASK_ULL(7, 0)
345 /* PRI queue */
346 #define PRIQ_ENT_DWORDS 2
347 #define PRIQ_MAX_SZ_SHIFT 8
349 #define PRIQ_0_SID GENMASK_ULL(31, 0)
350 #define PRIQ_0_SSID GENMASK_ULL(51, 32)
351 #define PRIQ_0_PERM_PRIV (1UL << 58)
352 #define PRIQ_0_PERM_EXEC (1UL << 59)
353 #define PRIQ_0_PERM_READ (1UL << 60)
354 #define PRIQ_0_PERM_WRITE (1UL << 61)
355 #define PRIQ_0_PRG_LAST (1UL << 62)
356 #define PRIQ_0_SSID_V (1UL << 63)
358 #define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
359 #define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
361 /* High-level queue structures */
362 #define ARM_SMMU_POLL_TIMEOUT_US 100
363 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
364 #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
366 #define MSI_IOVA_BASE 0x8000000
367 #define MSI_IOVA_LENGTH 0x100000
369 static bool disable_bypass;
370 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
371 MODULE_PARM_DESC(disable_bypass,
372 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
374 enum pri_resp {
375 PRI_RESP_DENY = 0,
376 PRI_RESP_FAIL = 1,
377 PRI_RESP_SUCC = 2,
380 enum arm_smmu_msi_index {
381 EVTQ_MSI_INDEX,
382 GERROR_MSI_INDEX,
383 PRIQ_MSI_INDEX,
384 ARM_SMMU_MAX_MSIS,
387 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
388 [EVTQ_MSI_INDEX] = {
389 ARM_SMMU_EVTQ_IRQ_CFG0,
390 ARM_SMMU_EVTQ_IRQ_CFG1,
391 ARM_SMMU_EVTQ_IRQ_CFG2,
393 [GERROR_MSI_INDEX] = {
394 ARM_SMMU_GERROR_IRQ_CFG0,
395 ARM_SMMU_GERROR_IRQ_CFG1,
396 ARM_SMMU_GERROR_IRQ_CFG2,
398 [PRIQ_MSI_INDEX] = {
399 ARM_SMMU_PRIQ_IRQ_CFG0,
400 ARM_SMMU_PRIQ_IRQ_CFG1,
401 ARM_SMMU_PRIQ_IRQ_CFG2,
405 struct arm_smmu_cmdq_ent {
406 /* Common fields */
407 u8 opcode;
408 bool substream_valid;
410 /* Command-specific fields */
411 union {
412 #define CMDQ_OP_PREFETCH_CFG 0x1
413 struct {
414 u32 sid;
415 u8 size;
416 u64 addr;
417 } prefetch;
419 #define CMDQ_OP_CFGI_STE 0x3
420 #define CMDQ_OP_CFGI_ALL 0x4
421 struct {
422 u32 sid;
423 union {
424 bool leaf;
425 u8 span;
427 } cfgi;
429 #define CMDQ_OP_TLBI_NH_ASID 0x11
430 #define CMDQ_OP_TLBI_NH_VA 0x12
431 #define CMDQ_OP_TLBI_EL2_ALL 0x20
432 #define CMDQ_OP_TLBI_S12_VMALL 0x28
433 #define CMDQ_OP_TLBI_S2_IPA 0x2a
434 #define CMDQ_OP_TLBI_NSNH_ALL 0x30
435 struct {
436 u16 asid;
437 u16 vmid;
438 bool leaf;
439 u64 addr;
440 } tlbi;
442 #define CMDQ_OP_PRI_RESP 0x41
443 struct {
444 u32 sid;
445 u32 ssid;
446 u16 grpid;
447 enum pri_resp resp;
448 } pri;
450 #define CMDQ_OP_CMD_SYNC 0x46
451 struct {
452 u32 msidata;
453 u64 msiaddr;
454 } sync;
458 struct arm_smmu_queue {
459 int irq; /* Wired interrupt */
461 __le64 *base;
462 dma_addr_t base_dma;
463 u64 q_base;
465 size_t ent_dwords;
466 u32 max_n_shift;
467 u32 prod;
468 u32 cons;
470 u32 __iomem *prod_reg;
471 u32 __iomem *cons_reg;
474 struct arm_smmu_cmdq {
475 struct arm_smmu_queue q;
476 spinlock_t lock;
479 struct arm_smmu_evtq {
480 struct arm_smmu_queue q;
481 u32 max_stalls;
484 struct arm_smmu_priq {
485 struct arm_smmu_queue q;
488 /* High-level stream table and context descriptor structures */
489 struct arm_smmu_strtab_l1_desc {
490 u8 span;
492 __le64 *l2ptr;
493 dma_addr_t l2ptr_dma;
496 struct arm_smmu_s1_cfg {
497 __le64 *cdptr;
498 dma_addr_t cdptr_dma;
500 struct arm_smmu_ctx_desc {
501 u16 asid;
502 u64 ttbr;
503 u64 tcr;
504 u64 mair;
505 } cd;
508 struct arm_smmu_s2_cfg {
509 u16 vmid;
510 u64 vttbr;
511 u64 vtcr;
514 struct arm_smmu_strtab_ent {
516 * An STE is "assigned" if the master emitting the corresponding SID
517 * is attached to a domain. The behaviour of an unassigned STE is
518 * determined by the disable_bypass parameter, whereas an assigned
519 * STE behaves according to s1_cfg/s2_cfg, which themselves are
520 * configured according to the domain type.
522 bool assigned;
523 struct arm_smmu_s1_cfg *s1_cfg;
524 struct arm_smmu_s2_cfg *s2_cfg;
527 struct arm_smmu_strtab_cfg {
528 __le64 *strtab;
529 dma_addr_t strtab_dma;
530 struct arm_smmu_strtab_l1_desc *l1_desc;
531 unsigned int num_l1_ents;
533 u64 strtab_base;
534 u32 strtab_base_cfg;
537 /* An SMMUv3 instance */
538 struct arm_smmu_device {
539 struct device *dev;
540 void __iomem *base;
542 #define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
543 #define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
544 #define ARM_SMMU_FEAT_TT_LE (1 << 2)
545 #define ARM_SMMU_FEAT_TT_BE (1 << 3)
546 #define ARM_SMMU_FEAT_PRI (1 << 4)
547 #define ARM_SMMU_FEAT_ATS (1 << 5)
548 #define ARM_SMMU_FEAT_SEV (1 << 6)
549 #define ARM_SMMU_FEAT_MSI (1 << 7)
550 #define ARM_SMMU_FEAT_COHERENCY (1 << 8)
551 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
552 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
553 #define ARM_SMMU_FEAT_STALLS (1 << 11)
554 #define ARM_SMMU_FEAT_HYP (1 << 12)
555 #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
556 #define ARM_SMMU_FEAT_VAX (1 << 14)
557 u32 features;
559 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
560 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
561 u32 options;
563 struct arm_smmu_cmdq cmdq;
564 struct arm_smmu_evtq evtq;
565 struct arm_smmu_priq priq;
567 int gerr_irq;
568 int combined_irq;
569 atomic_t sync_nr;
571 unsigned long ias; /* IPA */
572 unsigned long oas; /* PA */
573 unsigned long pgsize_bitmap;
575 #define ARM_SMMU_MAX_ASIDS (1 << 16)
576 unsigned int asid_bits;
577 DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
579 #define ARM_SMMU_MAX_VMIDS (1 << 16)
580 unsigned int vmid_bits;
581 DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
583 unsigned int ssid_bits;
584 unsigned int sid_bits;
586 struct arm_smmu_strtab_cfg strtab_cfg;
588 u32 sync_count;
590 /* IOMMU core code handle */
591 struct iommu_device iommu;
594 /* SMMU private data for each master */
595 struct arm_smmu_master_data {
596 struct arm_smmu_device *smmu;
597 struct arm_smmu_strtab_ent ste;
600 /* SMMU private data for an IOMMU domain */
601 enum arm_smmu_domain_stage {
602 ARM_SMMU_DOMAIN_S1 = 0,
603 ARM_SMMU_DOMAIN_S2,
604 ARM_SMMU_DOMAIN_NESTED,
605 ARM_SMMU_DOMAIN_BYPASS,
608 struct arm_smmu_domain {
609 struct arm_smmu_device *smmu;
610 struct mutex init_mutex; /* Protects smmu pointer */
612 struct io_pgtable_ops *pgtbl_ops;
614 enum arm_smmu_domain_stage stage;
615 union {
616 struct arm_smmu_s1_cfg s1_cfg;
617 struct arm_smmu_s2_cfg s2_cfg;
620 struct iommu_domain domain;
623 struct arm_smmu_option_prop {
624 u32 opt;
625 const char *prop;
628 static struct arm_smmu_option_prop arm_smmu_options[] = {
629 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
630 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
631 { 0, NULL},
634 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
635 struct arm_smmu_device *smmu)
637 if ((offset > SZ_64K) &&
638 (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
639 offset -= SZ_64K;
641 return smmu->base + offset;
644 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
646 return container_of(dom, struct arm_smmu_domain, domain);
649 static void parse_driver_options(struct arm_smmu_device *smmu)
651 int i = 0;
653 do {
654 if (of_property_read_bool(smmu->dev->of_node,
655 arm_smmu_options[i].prop)) {
656 smmu->options |= arm_smmu_options[i].opt;
657 dev_notice(smmu->dev, "option %s\n",
658 arm_smmu_options[i].prop);
660 } while (arm_smmu_options[++i].opt);
663 /* Low-level queue manipulation functions */
664 static bool queue_full(struct arm_smmu_queue *q)
666 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
667 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
670 static bool queue_empty(struct arm_smmu_queue *q)
672 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
673 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
676 static void queue_sync_cons(struct arm_smmu_queue *q)
678 q->cons = readl_relaxed(q->cons_reg);
681 static void queue_inc_cons(struct arm_smmu_queue *q)
683 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
685 q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
686 writel(q->cons, q->cons_reg);
689 static int queue_sync_prod(struct arm_smmu_queue *q)
691 int ret = 0;
692 u32 prod = readl_relaxed(q->prod_reg);
694 if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
695 ret = -EOVERFLOW;
697 q->prod = prod;
698 return ret;
701 static void queue_inc_prod(struct arm_smmu_queue *q)
703 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
705 q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
706 writel(q->prod, q->prod_reg);
710 * Wait for the SMMU to consume items. If drain is true, wait until the queue
711 * is empty. Otherwise, wait until there is at least one free slot.
713 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
715 ktime_t timeout;
716 unsigned int delay = 1, spin_cnt = 0;
718 /* Wait longer if it's a CMD_SYNC */
719 timeout = ktime_add_us(ktime_get(), sync ?
720 ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
721 ARM_SMMU_POLL_TIMEOUT_US);
723 while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
724 if (ktime_compare(ktime_get(), timeout) > 0)
725 return -ETIMEDOUT;
727 if (wfe) {
728 wfe();
729 } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
730 cpu_relax();
731 continue;
732 } else {
733 udelay(delay);
734 delay *= 2;
735 spin_cnt = 0;
739 return 0;
742 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
744 int i;
746 for (i = 0; i < n_dwords; ++i)
747 *dst++ = cpu_to_le64(*src++);
750 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
752 if (queue_full(q))
753 return -ENOSPC;
755 queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
756 queue_inc_prod(q);
757 return 0;
760 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
762 int i;
764 for (i = 0; i < n_dwords; ++i)
765 *dst++ = le64_to_cpu(*src++);
768 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
770 if (queue_empty(q))
771 return -EAGAIN;
773 queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
774 queue_inc_cons(q);
775 return 0;
778 /* High-level queue accessors */
779 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
781 memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
782 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
784 switch (ent->opcode) {
785 case CMDQ_OP_TLBI_EL2_ALL:
786 case CMDQ_OP_TLBI_NSNH_ALL:
787 break;
788 case CMDQ_OP_PREFETCH_CFG:
789 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
790 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
791 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
792 break;
793 case CMDQ_OP_CFGI_STE:
794 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
795 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
796 break;
797 case CMDQ_OP_CFGI_ALL:
798 /* Cover the entire SID range */
799 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
800 break;
801 case CMDQ_OP_TLBI_NH_VA:
802 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
803 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
804 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
805 break;
806 case CMDQ_OP_TLBI_S2_IPA:
807 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
808 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
809 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
810 break;
811 case CMDQ_OP_TLBI_NH_ASID:
812 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
813 /* Fallthrough */
814 case CMDQ_OP_TLBI_S12_VMALL:
815 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
816 break;
817 case CMDQ_OP_PRI_RESP:
818 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
819 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
820 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
821 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
822 switch (ent->pri.resp) {
823 case PRI_RESP_DENY:
824 case PRI_RESP_FAIL:
825 case PRI_RESP_SUCC:
826 break;
827 default:
828 return -EINVAL;
830 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
831 break;
832 case CMDQ_OP_CMD_SYNC:
833 if (ent->sync.msiaddr)
834 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
835 else
836 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
837 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
838 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
839 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, ent->sync.msidata);
840 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
841 break;
842 default:
843 return -ENOENT;
846 return 0;
849 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
851 static const char *cerror_str[] = {
852 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
853 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
854 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
857 int i;
858 u64 cmd[CMDQ_ENT_DWORDS];
859 struct arm_smmu_queue *q = &smmu->cmdq.q;
860 u32 cons = readl_relaxed(q->cons_reg);
861 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
862 struct arm_smmu_cmdq_ent cmd_sync = {
863 .opcode = CMDQ_OP_CMD_SYNC,
866 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
867 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
869 switch (idx) {
870 case CMDQ_ERR_CERROR_ABT_IDX:
871 dev_err(smmu->dev, "retrying command fetch\n");
872 case CMDQ_ERR_CERROR_NONE_IDX:
873 return;
874 case CMDQ_ERR_CERROR_ILL_IDX:
875 /* Fallthrough */
876 default:
877 break;
881 * We may have concurrent producers, so we need to be careful
882 * not to touch any of the shadow cmdq state.
884 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
885 dev_err(smmu->dev, "skipping command in error state:\n");
886 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
887 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
889 /* Convert the erroneous command into a CMD_SYNC */
890 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
891 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
892 return;
895 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
898 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
900 struct arm_smmu_queue *q = &smmu->cmdq.q;
901 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
903 while (queue_insert_raw(q, cmd) == -ENOSPC) {
904 if (queue_poll_cons(q, false, wfe))
905 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
909 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
910 struct arm_smmu_cmdq_ent *ent)
912 u64 cmd[CMDQ_ENT_DWORDS];
913 unsigned long flags;
915 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
916 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
917 ent->opcode);
918 return;
921 spin_lock_irqsave(&smmu->cmdq.lock, flags);
922 arm_smmu_cmdq_insert_cmd(smmu, cmd);
923 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
927 * The difference between val and sync_idx is bounded by the maximum size of
928 * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
930 static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
932 ktime_t timeout;
933 u32 val;
935 timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
936 val = smp_cond_load_acquire(&smmu->sync_count,
937 (int)(VAL - sync_idx) >= 0 ||
938 !ktime_before(ktime_get(), timeout));
940 return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
943 static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
945 u64 cmd[CMDQ_ENT_DWORDS];
946 unsigned long flags;
947 struct arm_smmu_cmdq_ent ent = {
948 .opcode = CMDQ_OP_CMD_SYNC,
949 .sync = {
950 .msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
951 .msiaddr = virt_to_phys(&smmu->sync_count),
955 arm_smmu_cmdq_build_cmd(cmd, &ent);
957 spin_lock_irqsave(&smmu->cmdq.lock, flags);
958 arm_smmu_cmdq_insert_cmd(smmu, cmd);
959 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
961 return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
964 static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
966 u64 cmd[CMDQ_ENT_DWORDS];
967 unsigned long flags;
968 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
969 struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
970 int ret;
972 arm_smmu_cmdq_build_cmd(cmd, &ent);
974 spin_lock_irqsave(&smmu->cmdq.lock, flags);
975 arm_smmu_cmdq_insert_cmd(smmu, cmd);
976 ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
977 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
979 return ret;
982 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
984 int ret;
985 bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
986 (smmu->features & ARM_SMMU_FEAT_COHERENCY);
988 ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
989 : __arm_smmu_cmdq_issue_sync(smmu);
990 if (ret)
991 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
994 /* Context descriptor manipulation functions */
995 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
997 u64 val = 0;
999 /* Repack the TCR. Just care about TTBR0 for now */
1000 val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
1001 val |= ARM_SMMU_TCR2CD(tcr, TG0);
1002 val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
1003 val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
1004 val |= ARM_SMMU_TCR2CD(tcr, SH0);
1005 val |= ARM_SMMU_TCR2CD(tcr, EPD0);
1006 val |= ARM_SMMU_TCR2CD(tcr, EPD1);
1007 val |= ARM_SMMU_TCR2CD(tcr, IPS);
1008 val |= ARM_SMMU_TCR2CD(tcr, TBI0);
1010 return val;
1013 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
1014 struct arm_smmu_s1_cfg *cfg)
1016 u64 val;
1019 * We don't need to issue any invalidation here, as we'll invalidate
1020 * the STE when installing the new entry anyway.
1022 val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
1023 #ifdef __BIG_ENDIAN
1024 CTXDESC_CD_0_ENDI |
1025 #endif
1026 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1027 CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
1028 CTXDESC_CD_0_V;
1030 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1031 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1032 val |= CTXDESC_CD_0_S;
1034 cfg->cdptr[0] = cpu_to_le64(val);
1036 val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
1037 cfg->cdptr[1] = cpu_to_le64(val);
1039 cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
1042 /* Stream table manipulation functions */
1043 static void
1044 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1046 u64 val = 0;
1048 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1049 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1051 *dst = cpu_to_le64(val);
1054 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1056 struct arm_smmu_cmdq_ent cmd = {
1057 .opcode = CMDQ_OP_CFGI_STE,
1058 .cfgi = {
1059 .sid = sid,
1060 .leaf = true,
1064 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1065 arm_smmu_cmdq_issue_sync(smmu);
1068 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1069 __le64 *dst, struct arm_smmu_strtab_ent *ste)
1072 * This is hideously complicated, but we only really care about
1073 * three cases at the moment:
1075 * 1. Invalid (all zero) -> bypass/fault (init)
1076 * 2. Bypass/fault -> translation/bypass (attach)
1077 * 3. Translation/bypass -> bypass/fault (detach)
1079 * Given that we can't update the STE atomically and the SMMU
1080 * doesn't read the thing in a defined order, that leaves us
1081 * with the following maintenance requirements:
1083 * 1. Update Config, return (init time STEs aren't live)
1084 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1085 * 3. Update Config, sync
1087 u64 val = le64_to_cpu(dst[0]);
1088 bool ste_live = false;
1089 struct arm_smmu_cmdq_ent prefetch_cmd = {
1090 .opcode = CMDQ_OP_PREFETCH_CFG,
1091 .prefetch = {
1092 .sid = sid,
1096 if (val & STRTAB_STE_0_V) {
1097 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1098 case STRTAB_STE_0_CFG_BYPASS:
1099 break;
1100 case STRTAB_STE_0_CFG_S1_TRANS:
1101 case STRTAB_STE_0_CFG_S2_TRANS:
1102 ste_live = true;
1103 break;
1104 case STRTAB_STE_0_CFG_ABORT:
1105 if (disable_bypass)
1106 break;
1107 default:
1108 BUG(); /* STE corruption */
1112 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1113 val = STRTAB_STE_0_V;
1115 /* Bypass/fault */
1116 if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
1117 if (!ste->assigned && disable_bypass)
1118 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1119 else
1120 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1122 dst[0] = cpu_to_le64(val);
1123 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1124 STRTAB_STE_1_SHCFG_INCOMING));
1125 dst[2] = 0; /* Nuke the VMID */
1127 * The SMMU can perform negative caching, so we must sync
1128 * the STE regardless of whether the old value was live.
1130 if (smmu)
1131 arm_smmu_sync_ste_for_sid(smmu, sid);
1132 return;
1135 if (ste->s1_cfg) {
1136 BUG_ON(ste_live);
1137 dst[1] = cpu_to_le64(
1138 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1139 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1140 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1141 #ifdef CONFIG_PCI_ATS
1142 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
1143 #endif
1144 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1146 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1147 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1148 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1150 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1151 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
1154 if (ste->s2_cfg) {
1155 BUG_ON(ste_live);
1156 dst[2] = cpu_to_le64(
1157 FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
1158 FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
1159 #ifdef __BIG_ENDIAN
1160 STRTAB_STE_2_S2ENDI |
1161 #endif
1162 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1163 STRTAB_STE_2_S2R);
1165 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1167 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1170 arm_smmu_sync_ste_for_sid(smmu, sid);
1171 dst[0] = cpu_to_le64(val);
1172 arm_smmu_sync_ste_for_sid(smmu, sid);
1174 /* It's likely that we'll want to use the new STE soon */
1175 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1176 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1179 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1181 unsigned int i;
1182 struct arm_smmu_strtab_ent ste = { .assigned = false };
1184 for (i = 0; i < nent; ++i) {
1185 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1186 strtab += STRTAB_STE_DWORDS;
1190 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1192 size_t size;
1193 void *strtab;
1194 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1195 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1197 if (desc->l2ptr)
1198 return 0;
1200 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1201 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1203 desc->span = STRTAB_SPLIT + 1;
1204 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1205 GFP_KERNEL | __GFP_ZERO);
1206 if (!desc->l2ptr) {
1207 dev_err(smmu->dev,
1208 "failed to allocate l2 stream table for SID %u\n",
1209 sid);
1210 return -ENOMEM;
1213 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1214 arm_smmu_write_strtab_l1_desc(strtab, desc);
1215 return 0;
1218 /* IRQ and event handlers */
1219 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1221 int i;
1222 struct arm_smmu_device *smmu = dev;
1223 struct arm_smmu_queue *q = &smmu->evtq.q;
1224 u64 evt[EVTQ_ENT_DWORDS];
1226 do {
1227 while (!queue_remove_raw(q, evt)) {
1228 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1230 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1231 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1232 dev_info(smmu->dev, "\t0x%016llx\n",
1233 (unsigned long long)evt[i]);
1238 * Not much we can do on overflow, so scream and pretend we're
1239 * trying harder.
1241 if (queue_sync_prod(q) == -EOVERFLOW)
1242 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1243 } while (!queue_empty(q));
1245 /* Sync our overflow flag, as we believe we're up to speed */
1246 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1247 return IRQ_HANDLED;
1250 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1252 u32 sid, ssid;
1253 u16 grpid;
1254 bool ssv, last;
1256 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1257 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1258 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1259 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1260 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1262 dev_info(smmu->dev, "unexpected PRI request received:\n");
1263 dev_info(smmu->dev,
1264 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1265 sid, ssid, grpid, last ? "L" : "",
1266 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1267 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1268 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1269 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1270 evt[1] & PRIQ_1_ADDR_MASK);
1272 if (last) {
1273 struct arm_smmu_cmdq_ent cmd = {
1274 .opcode = CMDQ_OP_PRI_RESP,
1275 .substream_valid = ssv,
1276 .pri = {
1277 .sid = sid,
1278 .ssid = ssid,
1279 .grpid = grpid,
1280 .resp = PRI_RESP_DENY,
1284 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1288 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1290 struct arm_smmu_device *smmu = dev;
1291 struct arm_smmu_queue *q = &smmu->priq.q;
1292 u64 evt[PRIQ_ENT_DWORDS];
1294 do {
1295 while (!queue_remove_raw(q, evt))
1296 arm_smmu_handle_ppr(smmu, evt);
1298 if (queue_sync_prod(q) == -EOVERFLOW)
1299 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1300 } while (!queue_empty(q));
1302 /* Sync our overflow flag, as we believe we're up to speed */
1303 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1304 return IRQ_HANDLED;
1307 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1309 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1311 u32 gerror, gerrorn, active;
1312 struct arm_smmu_device *smmu = dev;
1314 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1315 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1317 active = gerror ^ gerrorn;
1318 if (!(active & GERROR_ERR_MASK))
1319 return IRQ_NONE; /* No errors pending */
1321 dev_warn(smmu->dev,
1322 "unexpected global error reported (0x%08x), this could be serious\n",
1323 active);
1325 if (active & GERROR_SFM_ERR) {
1326 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1327 arm_smmu_device_disable(smmu);
1330 if (active & GERROR_MSI_GERROR_ABT_ERR)
1331 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1333 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1334 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1336 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1337 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1339 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1340 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1342 if (active & GERROR_PRIQ_ABT_ERR)
1343 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1345 if (active & GERROR_EVTQ_ABT_ERR)
1346 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1348 if (active & GERROR_CMDQ_ERR)
1349 arm_smmu_cmdq_skip_err(smmu);
1351 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1352 return IRQ_HANDLED;
1355 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1357 struct arm_smmu_device *smmu = dev;
1359 arm_smmu_evtq_thread(irq, dev);
1360 if (smmu->features & ARM_SMMU_FEAT_PRI)
1361 arm_smmu_priq_thread(irq, dev);
1363 return IRQ_HANDLED;
1366 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1368 arm_smmu_gerror_handler(irq, dev);
1369 return IRQ_WAKE_THREAD;
1372 /* IO_PGTABLE API */
1373 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1375 arm_smmu_cmdq_issue_sync(smmu);
1378 static void arm_smmu_tlb_sync(void *cookie)
1380 struct arm_smmu_domain *smmu_domain = cookie;
1381 __arm_smmu_tlb_sync(smmu_domain->smmu);
1384 static void arm_smmu_tlb_inv_context(void *cookie)
1386 struct arm_smmu_domain *smmu_domain = cookie;
1387 struct arm_smmu_device *smmu = smmu_domain->smmu;
1388 struct arm_smmu_cmdq_ent cmd;
1390 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1391 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
1392 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1393 cmd.tlbi.vmid = 0;
1394 } else {
1395 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1396 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1399 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1400 __arm_smmu_tlb_sync(smmu);
1403 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1404 size_t granule, bool leaf, void *cookie)
1406 struct arm_smmu_domain *smmu_domain = cookie;
1407 struct arm_smmu_device *smmu = smmu_domain->smmu;
1408 struct arm_smmu_cmdq_ent cmd = {
1409 .tlbi = {
1410 .leaf = leaf,
1411 .addr = iova,
1415 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1416 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
1417 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1418 } else {
1419 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1420 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1423 do {
1424 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1425 cmd.tlbi.addr += granule;
1426 } while (size -= granule);
1429 static const struct iommu_gather_ops arm_smmu_gather_ops = {
1430 .tlb_flush_all = arm_smmu_tlb_inv_context,
1431 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
1432 .tlb_sync = arm_smmu_tlb_sync,
1435 /* IOMMU API */
1436 static bool arm_smmu_capable(enum iommu_cap cap)
1438 switch (cap) {
1439 case IOMMU_CAP_CACHE_COHERENCY:
1440 return true;
1441 case IOMMU_CAP_NOEXEC:
1442 return true;
1443 default:
1444 return false;
1448 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1450 struct arm_smmu_domain *smmu_domain;
1452 if (type != IOMMU_DOMAIN_UNMANAGED &&
1453 type != IOMMU_DOMAIN_DMA &&
1454 type != IOMMU_DOMAIN_IDENTITY)
1455 return NULL;
1458 * Allocate the domain and initialise some of its data structures.
1459 * We can't really do anything meaningful until we've added a
1460 * master.
1462 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1463 if (!smmu_domain)
1464 return NULL;
1466 if (type == IOMMU_DOMAIN_DMA &&
1467 iommu_get_dma_cookie(&smmu_domain->domain)) {
1468 kfree(smmu_domain);
1469 return NULL;
1472 mutex_init(&smmu_domain->init_mutex);
1473 return &smmu_domain->domain;
1476 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1478 int idx, size = 1 << span;
1480 do {
1481 idx = find_first_zero_bit(map, size);
1482 if (idx == size)
1483 return -ENOSPC;
1484 } while (test_and_set_bit(idx, map));
1486 return idx;
1489 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1491 clear_bit(idx, map);
1494 static void arm_smmu_domain_free(struct iommu_domain *domain)
1496 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1497 struct arm_smmu_device *smmu = smmu_domain->smmu;
1499 iommu_put_dma_cookie(domain);
1500 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1502 /* Free the CD and ASID, if we allocated them */
1503 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1504 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1506 if (cfg->cdptr) {
1507 dmam_free_coherent(smmu_domain->smmu->dev,
1508 CTXDESC_CD_DWORDS << 3,
1509 cfg->cdptr,
1510 cfg->cdptr_dma);
1512 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1514 } else {
1515 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1516 if (cfg->vmid)
1517 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1520 kfree(smmu_domain);
1523 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1524 struct io_pgtable_cfg *pgtbl_cfg)
1526 int ret;
1527 int asid;
1528 struct arm_smmu_device *smmu = smmu_domain->smmu;
1529 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1531 asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1532 if (asid < 0)
1533 return asid;
1535 cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1536 &cfg->cdptr_dma,
1537 GFP_KERNEL | __GFP_ZERO);
1538 if (!cfg->cdptr) {
1539 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1540 ret = -ENOMEM;
1541 goto out_free_asid;
1544 cfg->cd.asid = (u16)asid;
1545 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1546 cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1547 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1548 return 0;
1550 out_free_asid:
1551 arm_smmu_bitmap_free(smmu->asid_map, asid);
1552 return ret;
1555 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1556 struct io_pgtable_cfg *pgtbl_cfg)
1558 int vmid;
1559 struct arm_smmu_device *smmu = smmu_domain->smmu;
1560 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1562 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1563 if (vmid < 0)
1564 return vmid;
1566 cfg->vmid = (u16)vmid;
1567 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1568 cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1569 return 0;
1572 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1574 int ret;
1575 unsigned long ias, oas;
1576 enum io_pgtable_fmt fmt;
1577 struct io_pgtable_cfg pgtbl_cfg;
1578 struct io_pgtable_ops *pgtbl_ops;
1579 int (*finalise_stage_fn)(struct arm_smmu_domain *,
1580 struct io_pgtable_cfg *);
1581 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1582 struct arm_smmu_device *smmu = smmu_domain->smmu;
1584 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1585 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1586 return 0;
1589 /* Restrict the stage to what we can actually support */
1590 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1591 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1592 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1593 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1595 switch (smmu_domain->stage) {
1596 case ARM_SMMU_DOMAIN_S1:
1597 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1598 ias = min_t(unsigned long, ias, VA_BITS);
1599 oas = smmu->ias;
1600 fmt = ARM_64_LPAE_S1;
1601 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1602 break;
1603 case ARM_SMMU_DOMAIN_NESTED:
1604 case ARM_SMMU_DOMAIN_S2:
1605 ias = smmu->ias;
1606 oas = smmu->oas;
1607 fmt = ARM_64_LPAE_S2;
1608 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1609 break;
1610 default:
1611 return -EINVAL;
1614 pgtbl_cfg = (struct io_pgtable_cfg) {
1615 .pgsize_bitmap = smmu->pgsize_bitmap,
1616 .ias = ias,
1617 .oas = oas,
1618 .tlb = &arm_smmu_gather_ops,
1619 .iommu_dev = smmu->dev,
1622 if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1623 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1625 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1626 if (!pgtbl_ops)
1627 return -ENOMEM;
1629 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1630 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1631 domain->geometry.force_aperture = true;
1633 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1634 if (ret < 0) {
1635 free_io_pgtable_ops(pgtbl_ops);
1636 return ret;
1639 smmu_domain->pgtbl_ops = pgtbl_ops;
1640 return 0;
1643 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1645 __le64 *step;
1646 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1648 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1649 struct arm_smmu_strtab_l1_desc *l1_desc;
1650 int idx;
1652 /* Two-level walk */
1653 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1654 l1_desc = &cfg->l1_desc[idx];
1655 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1656 step = &l1_desc->l2ptr[idx];
1657 } else {
1658 /* Simple linear lookup */
1659 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1662 return step;
1665 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1667 int i, j;
1668 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1669 struct arm_smmu_device *smmu = master->smmu;
1671 for (i = 0; i < fwspec->num_ids; ++i) {
1672 u32 sid = fwspec->ids[i];
1673 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1675 /* Bridged PCI devices may end up with duplicated IDs */
1676 for (j = 0; j < i; j++)
1677 if (fwspec->ids[j] == sid)
1678 break;
1679 if (j < i)
1680 continue;
1682 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1686 static void arm_smmu_detach_dev(struct device *dev)
1688 struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
1690 master->ste.assigned = false;
1691 arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1694 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1696 int ret = 0;
1697 struct arm_smmu_device *smmu;
1698 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1699 struct arm_smmu_master_data *master;
1700 struct arm_smmu_strtab_ent *ste;
1702 if (!dev->iommu_fwspec)
1703 return -ENOENT;
1705 master = dev->iommu_fwspec->iommu_priv;
1706 smmu = master->smmu;
1707 ste = &master->ste;
1709 /* Already attached to a different domain? */
1710 if (ste->assigned)
1711 arm_smmu_detach_dev(dev);
1713 mutex_lock(&smmu_domain->init_mutex);
1715 if (!smmu_domain->smmu) {
1716 smmu_domain->smmu = smmu;
1717 ret = arm_smmu_domain_finalise(domain);
1718 if (ret) {
1719 smmu_domain->smmu = NULL;
1720 goto out_unlock;
1722 } else if (smmu_domain->smmu != smmu) {
1723 dev_err(dev,
1724 "cannot attach to SMMU %s (upstream of %s)\n",
1725 dev_name(smmu_domain->smmu->dev),
1726 dev_name(smmu->dev));
1727 ret = -ENXIO;
1728 goto out_unlock;
1731 ste->assigned = true;
1733 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
1734 ste->s1_cfg = NULL;
1735 ste->s2_cfg = NULL;
1736 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1737 ste->s1_cfg = &smmu_domain->s1_cfg;
1738 ste->s2_cfg = NULL;
1739 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1740 } else {
1741 ste->s1_cfg = NULL;
1742 ste->s2_cfg = &smmu_domain->s2_cfg;
1745 arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1746 out_unlock:
1747 mutex_unlock(&smmu_domain->init_mutex);
1748 return ret;
1751 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1752 phys_addr_t paddr, size_t size, int prot)
1754 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1756 if (!ops)
1757 return -ENODEV;
1759 return ops->map(ops, iova, paddr, size, prot);
1762 static size_t
1763 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1765 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1767 if (!ops)
1768 return 0;
1770 return ops->unmap(ops, iova, size);
1773 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1775 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1777 if (smmu)
1778 __arm_smmu_tlb_sync(smmu);
1781 static phys_addr_t
1782 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1784 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1786 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1787 return iova;
1789 if (!ops)
1790 return 0;
1792 return ops->iova_to_phys(ops, iova);
1795 static struct platform_driver arm_smmu_driver;
1797 static int arm_smmu_match_node(struct device *dev, void *data)
1799 return dev->fwnode == data;
1802 static
1803 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1805 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1806 fwnode, arm_smmu_match_node);
1807 put_device(dev);
1808 return dev ? dev_get_drvdata(dev) : NULL;
1811 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1813 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1815 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1816 limit *= 1UL << STRTAB_SPLIT;
1818 return sid < limit;
1821 static struct iommu_ops arm_smmu_ops;
1823 static int arm_smmu_add_device(struct device *dev)
1825 int i, ret;
1826 struct arm_smmu_device *smmu;
1827 struct arm_smmu_master_data *master;
1828 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1829 struct iommu_group *group;
1831 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1832 return -ENODEV;
1834 * We _can_ actually withstand dodgy bus code re-calling add_device()
1835 * without an intervening remove_device()/of_xlate() sequence, but
1836 * we're not going to do so quietly...
1838 if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1839 master = fwspec->iommu_priv;
1840 smmu = master->smmu;
1841 } else {
1842 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1843 if (!smmu)
1844 return -ENODEV;
1845 master = kzalloc(sizeof(*master), GFP_KERNEL);
1846 if (!master)
1847 return -ENOMEM;
1849 master->smmu = smmu;
1850 fwspec->iommu_priv = master;
1853 /* Check the SIDs are in range of the SMMU and our stream table */
1854 for (i = 0; i < fwspec->num_ids; i++) {
1855 u32 sid = fwspec->ids[i];
1857 if (!arm_smmu_sid_in_range(smmu, sid))
1858 return -ERANGE;
1860 /* Ensure l2 strtab is initialised */
1861 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1862 ret = arm_smmu_init_l2_strtab(smmu, sid);
1863 if (ret)
1864 return ret;
1868 group = iommu_group_get_for_dev(dev);
1869 if (!IS_ERR(group)) {
1870 iommu_group_put(group);
1871 iommu_device_link(&smmu->iommu, dev);
1874 return PTR_ERR_OR_ZERO(group);
1877 static void arm_smmu_remove_device(struct device *dev)
1879 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1880 struct arm_smmu_master_data *master;
1881 struct arm_smmu_device *smmu;
1883 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1884 return;
1886 master = fwspec->iommu_priv;
1887 smmu = master->smmu;
1888 if (master && master->ste.assigned)
1889 arm_smmu_detach_dev(dev);
1890 iommu_group_remove_device(dev);
1891 iommu_device_unlink(&smmu->iommu, dev);
1892 kfree(master);
1893 iommu_fwspec_free(dev);
1896 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1898 struct iommu_group *group;
1901 * We don't support devices sharing stream IDs other than PCI RID
1902 * aliases, since the necessary ID-to-device lookup becomes rather
1903 * impractical given a potential sparse 32-bit stream ID space.
1905 if (dev_is_pci(dev))
1906 group = pci_device_group(dev);
1907 else
1908 group = generic_device_group(dev);
1910 return group;
1913 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1914 enum iommu_attr attr, void *data)
1916 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1918 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1919 return -EINVAL;
1921 switch (attr) {
1922 case DOMAIN_ATTR_NESTING:
1923 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1924 return 0;
1925 default:
1926 return -ENODEV;
1930 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1931 enum iommu_attr attr, void *data)
1933 int ret = 0;
1934 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1936 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1937 return -EINVAL;
1939 mutex_lock(&smmu_domain->init_mutex);
1941 switch (attr) {
1942 case DOMAIN_ATTR_NESTING:
1943 if (smmu_domain->smmu) {
1944 ret = -EPERM;
1945 goto out_unlock;
1948 if (*(int *)data)
1949 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1950 else
1951 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1953 break;
1954 default:
1955 ret = -ENODEV;
1958 out_unlock:
1959 mutex_unlock(&smmu_domain->init_mutex);
1960 return ret;
1963 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1965 return iommu_fwspec_add_ids(dev, args->args, 1);
1968 static void arm_smmu_get_resv_regions(struct device *dev,
1969 struct list_head *head)
1971 struct iommu_resv_region *region;
1972 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1974 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1975 prot, IOMMU_RESV_SW_MSI);
1976 if (!region)
1977 return;
1979 list_add_tail(&region->list, head);
1981 iommu_dma_get_resv_regions(dev, head);
1984 static void arm_smmu_put_resv_regions(struct device *dev,
1985 struct list_head *head)
1987 struct iommu_resv_region *entry, *next;
1989 list_for_each_entry_safe(entry, next, head, list)
1990 kfree(entry);
1993 static struct iommu_ops arm_smmu_ops = {
1994 .capable = arm_smmu_capable,
1995 .domain_alloc = arm_smmu_domain_alloc,
1996 .domain_free = arm_smmu_domain_free,
1997 .attach_dev = arm_smmu_attach_dev,
1998 .map = arm_smmu_map,
1999 .unmap = arm_smmu_unmap,
2000 .map_sg = default_iommu_map_sg,
2001 .flush_iotlb_all = arm_smmu_iotlb_sync,
2002 .iotlb_sync = arm_smmu_iotlb_sync,
2003 .iova_to_phys = arm_smmu_iova_to_phys,
2004 .add_device = arm_smmu_add_device,
2005 .remove_device = arm_smmu_remove_device,
2006 .device_group = arm_smmu_device_group,
2007 .domain_get_attr = arm_smmu_domain_get_attr,
2008 .domain_set_attr = arm_smmu_domain_set_attr,
2009 .of_xlate = arm_smmu_of_xlate,
2010 .get_resv_regions = arm_smmu_get_resv_regions,
2011 .put_resv_regions = arm_smmu_put_resv_regions,
2012 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2015 /* Probing and initialisation functions */
2016 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2017 struct arm_smmu_queue *q,
2018 unsigned long prod_off,
2019 unsigned long cons_off,
2020 size_t dwords)
2022 size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
2024 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
2025 if (!q->base) {
2026 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
2027 qsz);
2028 return -ENOMEM;
2031 q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
2032 q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
2033 q->ent_dwords = dwords;
2035 q->q_base = Q_BASE_RWA;
2036 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2037 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
2039 q->prod = q->cons = 0;
2040 return 0;
2043 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2045 int ret;
2047 /* cmdq */
2048 spin_lock_init(&smmu->cmdq.lock);
2049 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2050 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
2051 if (ret)
2052 return ret;
2054 /* evtq */
2055 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2056 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2057 if (ret)
2058 return ret;
2060 /* priq */
2061 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2062 return 0;
2064 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2065 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2068 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2070 unsigned int i;
2071 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2072 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2073 void *strtab = smmu->strtab_cfg.strtab;
2075 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2076 if (!cfg->l1_desc) {
2077 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2078 return -ENOMEM;
2081 for (i = 0; i < cfg->num_l1_ents; ++i) {
2082 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2083 strtab += STRTAB_L1_DESC_DWORDS << 3;
2086 return 0;
2089 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2091 void *strtab;
2092 u64 reg;
2093 u32 size, l1size;
2094 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2096 /* Calculate the L1 size, capped to the SIDSIZE. */
2097 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2098 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2099 cfg->num_l1_ents = 1 << size;
2101 size += STRTAB_SPLIT;
2102 if (size < smmu->sid_bits)
2103 dev_warn(smmu->dev,
2104 "2-level strtab only covers %u/%u bits of SID\n",
2105 size, smmu->sid_bits);
2107 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2108 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2109 GFP_KERNEL | __GFP_ZERO);
2110 if (!strtab) {
2111 dev_err(smmu->dev,
2112 "failed to allocate l1 stream table (%u bytes)\n",
2113 size);
2114 return -ENOMEM;
2116 cfg->strtab = strtab;
2118 /* Configure strtab_base_cfg for 2 levels */
2119 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2120 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2121 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2122 cfg->strtab_base_cfg = reg;
2124 return arm_smmu_init_l1_strtab(smmu);
2127 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2129 void *strtab;
2130 u64 reg;
2131 u32 size;
2132 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2134 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2135 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2136 GFP_KERNEL | __GFP_ZERO);
2137 if (!strtab) {
2138 dev_err(smmu->dev,
2139 "failed to allocate linear stream table (%u bytes)\n",
2140 size);
2141 return -ENOMEM;
2143 cfg->strtab = strtab;
2144 cfg->num_l1_ents = 1 << smmu->sid_bits;
2146 /* Configure strtab_base_cfg for a linear table covering all SIDs */
2147 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2148 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2149 cfg->strtab_base_cfg = reg;
2151 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2152 return 0;
2155 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2157 u64 reg;
2158 int ret;
2160 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2161 ret = arm_smmu_init_strtab_2lvl(smmu);
2162 else
2163 ret = arm_smmu_init_strtab_linear(smmu);
2165 if (ret)
2166 return ret;
2168 /* Set the strtab base address */
2169 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2170 reg |= STRTAB_BASE_RA;
2171 smmu->strtab_cfg.strtab_base = reg;
2173 /* Allocate the first VMID for stage-2 bypass STEs */
2174 set_bit(0, smmu->vmid_map);
2175 return 0;
2178 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2180 int ret;
2182 atomic_set(&smmu->sync_nr, 0);
2183 ret = arm_smmu_init_queues(smmu);
2184 if (ret)
2185 return ret;
2187 return arm_smmu_init_strtab(smmu);
2190 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2191 unsigned int reg_off, unsigned int ack_off)
2193 u32 reg;
2195 writel_relaxed(val, smmu->base + reg_off);
2196 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2197 1, ARM_SMMU_POLL_TIMEOUT_US);
2200 /* GBPA is "special" */
2201 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2203 int ret;
2204 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2206 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2207 1, ARM_SMMU_POLL_TIMEOUT_US);
2208 if (ret)
2209 return ret;
2211 reg &= ~clr;
2212 reg |= set;
2213 writel_relaxed(reg | GBPA_UPDATE, gbpa);
2214 return readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2215 1, ARM_SMMU_POLL_TIMEOUT_US);
2218 static void arm_smmu_free_msis(void *data)
2220 struct device *dev = data;
2221 platform_msi_domain_free_irqs(dev);
2224 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2226 phys_addr_t doorbell;
2227 struct device *dev = msi_desc_to_dev(desc);
2228 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2229 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2231 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2232 doorbell &= MSI_CFG0_ADDR_MASK;
2234 writeq_relaxed(doorbell, smmu->base + cfg[0]);
2235 writel_relaxed(msg->data, smmu->base + cfg[1]);
2236 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2239 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2241 struct msi_desc *desc;
2242 int ret, nvec = ARM_SMMU_MAX_MSIS;
2243 struct device *dev = smmu->dev;
2245 /* Clear the MSI address regs */
2246 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2247 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2249 if (smmu->features & ARM_SMMU_FEAT_PRI)
2250 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2251 else
2252 nvec--;
2254 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2255 return;
2257 if (!dev->msi_domain) {
2258 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2259 return;
2262 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2263 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2264 if (ret) {
2265 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2266 return;
2269 for_each_msi_entry(desc, dev) {
2270 switch (desc->platform.msi_index) {
2271 case EVTQ_MSI_INDEX:
2272 smmu->evtq.q.irq = desc->irq;
2273 break;
2274 case GERROR_MSI_INDEX:
2275 smmu->gerr_irq = desc->irq;
2276 break;
2277 case PRIQ_MSI_INDEX:
2278 smmu->priq.q.irq = desc->irq;
2279 break;
2280 default: /* Unknown */
2281 continue;
2285 /* Add callback to free MSIs on teardown */
2286 devm_add_action(dev, arm_smmu_free_msis, dev);
2289 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2291 int irq, ret;
2293 arm_smmu_setup_msis(smmu);
2295 /* Request interrupt lines */
2296 irq = smmu->evtq.q.irq;
2297 if (irq) {
2298 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2299 arm_smmu_evtq_thread,
2300 IRQF_ONESHOT,
2301 "arm-smmu-v3-evtq", smmu);
2302 if (ret < 0)
2303 dev_warn(smmu->dev, "failed to enable evtq irq\n");
2304 } else {
2305 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2308 irq = smmu->gerr_irq;
2309 if (irq) {
2310 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2311 0, "arm-smmu-v3-gerror", smmu);
2312 if (ret < 0)
2313 dev_warn(smmu->dev, "failed to enable gerror irq\n");
2314 } else {
2315 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2318 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2319 irq = smmu->priq.q.irq;
2320 if (irq) {
2321 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2322 arm_smmu_priq_thread,
2323 IRQF_ONESHOT,
2324 "arm-smmu-v3-priq",
2325 smmu);
2326 if (ret < 0)
2327 dev_warn(smmu->dev,
2328 "failed to enable priq irq\n");
2329 } else {
2330 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2335 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2337 int ret, irq;
2338 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2340 /* Disable IRQs first */
2341 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2342 ARM_SMMU_IRQ_CTRLACK);
2343 if (ret) {
2344 dev_err(smmu->dev, "failed to disable irqs\n");
2345 return ret;
2348 irq = smmu->combined_irq;
2349 if (irq) {
2351 * Cavium ThunderX2 implementation doesn't not support unique
2352 * irq lines. Use single irq line for all the SMMUv3 interrupts.
2354 ret = devm_request_threaded_irq(smmu->dev, irq,
2355 arm_smmu_combined_irq_handler,
2356 arm_smmu_combined_irq_thread,
2357 IRQF_ONESHOT,
2358 "arm-smmu-v3-combined-irq", smmu);
2359 if (ret < 0)
2360 dev_warn(smmu->dev, "failed to enable combined irq\n");
2361 } else
2362 arm_smmu_setup_unique_irqs(smmu);
2364 if (smmu->features & ARM_SMMU_FEAT_PRI)
2365 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2367 /* Enable interrupt generation on the SMMU */
2368 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2369 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2370 if (ret)
2371 dev_warn(smmu->dev, "failed to enable irqs\n");
2373 return 0;
2376 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2378 int ret;
2380 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2381 if (ret)
2382 dev_err(smmu->dev, "failed to clear cr0\n");
2384 return ret;
2387 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2389 int ret;
2390 u32 reg, enables;
2391 struct arm_smmu_cmdq_ent cmd;
2393 /* Clear CR0 and sync (disables SMMU and queue processing) */
2394 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2395 if (reg & CR0_SMMUEN)
2396 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2398 ret = arm_smmu_device_disable(smmu);
2399 if (ret)
2400 return ret;
2402 /* CR1 (table and queue memory attributes) */
2403 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2404 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2405 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2406 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2407 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2408 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2409 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2411 /* CR2 (random crap) */
2412 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2413 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2415 /* Stream table */
2416 writeq_relaxed(smmu->strtab_cfg.strtab_base,
2417 smmu->base + ARM_SMMU_STRTAB_BASE);
2418 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2419 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2421 /* Command queue */
2422 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2423 writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2424 writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2426 enables = CR0_CMDQEN;
2427 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2428 ARM_SMMU_CR0ACK);
2429 if (ret) {
2430 dev_err(smmu->dev, "failed to enable command queue\n");
2431 return ret;
2434 /* Invalidate any cached configuration */
2435 cmd.opcode = CMDQ_OP_CFGI_ALL;
2436 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2437 arm_smmu_cmdq_issue_sync(smmu);
2439 /* Invalidate any stale TLB entries */
2440 if (smmu->features & ARM_SMMU_FEAT_HYP) {
2441 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2442 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2445 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2446 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2447 arm_smmu_cmdq_issue_sync(smmu);
2449 /* Event queue */
2450 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2451 writel_relaxed(smmu->evtq.q.prod,
2452 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2453 writel_relaxed(smmu->evtq.q.cons,
2454 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2456 enables |= CR0_EVTQEN;
2457 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2458 ARM_SMMU_CR0ACK);
2459 if (ret) {
2460 dev_err(smmu->dev, "failed to enable event queue\n");
2461 return ret;
2464 /* PRI queue */
2465 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2466 writeq_relaxed(smmu->priq.q.q_base,
2467 smmu->base + ARM_SMMU_PRIQ_BASE);
2468 writel_relaxed(smmu->priq.q.prod,
2469 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2470 writel_relaxed(smmu->priq.q.cons,
2471 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
2473 enables |= CR0_PRIQEN;
2474 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2475 ARM_SMMU_CR0ACK);
2476 if (ret) {
2477 dev_err(smmu->dev, "failed to enable PRI queue\n");
2478 return ret;
2482 ret = arm_smmu_setup_irqs(smmu);
2483 if (ret) {
2484 dev_err(smmu->dev, "failed to setup irqs\n");
2485 return ret;
2489 /* Enable the SMMU interface, or ensure bypass */
2490 if (!bypass || disable_bypass) {
2491 enables |= CR0_SMMUEN;
2492 } else {
2493 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2494 if (ret) {
2495 dev_err(smmu->dev, "GBPA not responding to update\n");
2496 return ret;
2499 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2500 ARM_SMMU_CR0ACK);
2501 if (ret) {
2502 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2503 return ret;
2506 return 0;
2509 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2511 u32 reg;
2512 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
2514 /* IDR0 */
2515 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2517 /* 2-level structures */
2518 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
2519 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2521 if (reg & IDR0_CD2L)
2522 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2525 * Translation table endianness.
2526 * We currently require the same endianness as the CPU, but this
2527 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2529 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
2530 case IDR0_TTENDIAN_MIXED:
2531 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2532 break;
2533 #ifdef __BIG_ENDIAN
2534 case IDR0_TTENDIAN_BE:
2535 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2536 break;
2537 #else
2538 case IDR0_TTENDIAN_LE:
2539 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2540 break;
2541 #endif
2542 default:
2543 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2544 return -ENXIO;
2547 /* Boolean feature flags */
2548 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2549 smmu->features |= ARM_SMMU_FEAT_PRI;
2551 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2552 smmu->features |= ARM_SMMU_FEAT_ATS;
2554 if (reg & IDR0_SEV)
2555 smmu->features |= ARM_SMMU_FEAT_SEV;
2557 if (reg & IDR0_MSI)
2558 smmu->features |= ARM_SMMU_FEAT_MSI;
2560 if (reg & IDR0_HYP)
2561 smmu->features |= ARM_SMMU_FEAT_HYP;
2564 * The coherency feature as set by FW is used in preference to the ID
2565 * register, but warn on mismatch.
2567 if (!!(reg & IDR0_COHACC) != coherent)
2568 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2569 coherent ? "true" : "false");
2571 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
2572 case IDR0_STALL_MODEL_FORCE:
2573 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2574 /* Fallthrough */
2575 case IDR0_STALL_MODEL_STALL:
2576 smmu->features |= ARM_SMMU_FEAT_STALLS;
2579 if (reg & IDR0_S1P)
2580 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2582 if (reg & IDR0_S2P)
2583 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2585 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2586 dev_err(smmu->dev, "no translation support!\n");
2587 return -ENXIO;
2590 /* We only support the AArch64 table format at present */
2591 switch (FIELD_GET(IDR0_TTF, reg)) {
2592 case IDR0_TTF_AARCH32_64:
2593 smmu->ias = 40;
2594 /* Fallthrough */
2595 case IDR0_TTF_AARCH64:
2596 break;
2597 default:
2598 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2599 return -ENXIO;
2602 /* ASID/VMID sizes */
2603 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2604 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2606 /* IDR1 */
2607 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2608 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2609 dev_err(smmu->dev, "embedded implementation not supported\n");
2610 return -ENXIO;
2613 /* Queue sizes, capped at 4k */
2614 smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
2615 FIELD_GET(IDR1_CMDQS, reg));
2616 if (!smmu->cmdq.q.max_n_shift) {
2617 /* Odd alignment restrictions on the base, so ignore for now */
2618 dev_err(smmu->dev, "unit-length command queue not supported\n");
2619 return -ENXIO;
2622 smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
2623 FIELD_GET(IDR1_EVTQS, reg));
2624 smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
2625 FIELD_GET(IDR1_PRIQS, reg));
2627 /* SID/SSID sizes */
2628 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
2629 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
2632 * If the SMMU supports fewer bits than would fill a single L2 stream
2633 * table, use a linear table instead.
2635 if (smmu->sid_bits <= STRTAB_SPLIT)
2636 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
2638 /* IDR5 */
2639 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2641 /* Maximum number of outstanding stalls */
2642 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
2644 /* Page sizes */
2645 if (reg & IDR5_GRAN64K)
2646 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2647 if (reg & IDR5_GRAN16K)
2648 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2649 if (reg & IDR5_GRAN4K)
2650 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2652 /* Input address size */
2653 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
2654 smmu->features |= ARM_SMMU_FEAT_VAX;
2656 /* Output address size */
2657 switch (FIELD_GET(IDR5_OAS, reg)) {
2658 case IDR5_OAS_32_BIT:
2659 smmu->oas = 32;
2660 break;
2661 case IDR5_OAS_36_BIT:
2662 smmu->oas = 36;
2663 break;
2664 case IDR5_OAS_40_BIT:
2665 smmu->oas = 40;
2666 break;
2667 case IDR5_OAS_42_BIT:
2668 smmu->oas = 42;
2669 break;
2670 case IDR5_OAS_44_BIT:
2671 smmu->oas = 44;
2672 break;
2673 case IDR5_OAS_52_BIT:
2674 smmu->oas = 52;
2675 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
2676 break;
2677 default:
2678 dev_info(smmu->dev,
2679 "unknown output address size. Truncating to 48-bit\n");
2680 /* Fallthrough */
2681 case IDR5_OAS_48_BIT:
2682 smmu->oas = 48;
2685 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2686 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2687 else
2688 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2690 /* Set the DMA mask for our table walker */
2691 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2692 dev_warn(smmu->dev,
2693 "failed to set DMA mask for table walker\n");
2695 smmu->ias = max(smmu->ias, smmu->oas);
2697 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2698 smmu->ias, smmu->oas, smmu->features);
2699 return 0;
2702 #ifdef CONFIG_ACPI
2703 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2705 switch (model) {
2706 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2707 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2708 break;
2709 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2710 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2711 break;
2714 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
2717 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2718 struct arm_smmu_device *smmu)
2720 struct acpi_iort_smmu_v3 *iort_smmu;
2721 struct device *dev = smmu->dev;
2722 struct acpi_iort_node *node;
2724 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
2726 /* Retrieve SMMUv3 specific data */
2727 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
2729 acpi_smmu_get_options(iort_smmu->model, smmu);
2731 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
2732 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2734 return 0;
2736 #else
2737 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2738 struct arm_smmu_device *smmu)
2740 return -ENODEV;
2742 #endif
2744 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2745 struct arm_smmu_device *smmu)
2747 struct device *dev = &pdev->dev;
2748 u32 cells;
2749 int ret = -EINVAL;
2751 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2752 dev_err(dev, "missing #iommu-cells property\n");
2753 else if (cells != 1)
2754 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2755 else
2756 ret = 0;
2758 parse_driver_options(smmu);
2760 if (of_dma_is_coherent(dev->of_node))
2761 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2763 return ret;
2766 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
2768 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
2769 return SZ_64K;
2770 else
2771 return SZ_128K;
2774 static int arm_smmu_device_probe(struct platform_device *pdev)
2776 int irq, ret;
2777 struct resource *res;
2778 resource_size_t ioaddr;
2779 struct arm_smmu_device *smmu;
2780 struct device *dev = &pdev->dev;
2781 bool bypass;
2783 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2784 if (!smmu) {
2785 dev_err(dev, "failed to allocate arm_smmu_device\n");
2786 return -ENOMEM;
2788 smmu->dev = dev;
2790 if (dev->of_node) {
2791 ret = arm_smmu_device_dt_probe(pdev, smmu);
2792 } else {
2793 ret = arm_smmu_device_acpi_probe(pdev, smmu);
2794 if (ret == -ENODEV)
2795 return ret;
2798 /* Set bypass mode according to firmware probing result */
2799 bypass = !!ret;
2801 /* Base address */
2802 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2803 if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
2804 dev_err(dev, "MMIO region too small (%pr)\n", res);
2805 return -EINVAL;
2807 ioaddr = res->start;
2809 smmu->base = devm_ioremap_resource(dev, res);
2810 if (IS_ERR(smmu->base))
2811 return PTR_ERR(smmu->base);
2813 /* Interrupt lines */
2815 irq = platform_get_irq_byname(pdev, "combined");
2816 if (irq > 0)
2817 smmu->combined_irq = irq;
2818 else {
2819 irq = platform_get_irq_byname(pdev, "eventq");
2820 if (irq > 0)
2821 smmu->evtq.q.irq = irq;
2823 irq = platform_get_irq_byname(pdev, "priq");
2824 if (irq > 0)
2825 smmu->priq.q.irq = irq;
2827 irq = platform_get_irq_byname(pdev, "gerror");
2828 if (irq > 0)
2829 smmu->gerr_irq = irq;
2831 /* Probe the h/w */
2832 ret = arm_smmu_device_hw_probe(smmu);
2833 if (ret)
2834 return ret;
2836 /* Initialise in-memory data structures */
2837 ret = arm_smmu_init_structures(smmu);
2838 if (ret)
2839 return ret;
2841 /* Record our private device structure */
2842 platform_set_drvdata(pdev, smmu);
2844 /* Reset the device */
2845 ret = arm_smmu_device_reset(smmu, bypass);
2846 if (ret)
2847 return ret;
2849 /* And we're up. Go go go! */
2850 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
2851 "smmu3.%pa", &ioaddr);
2852 if (ret)
2853 return ret;
2855 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2856 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2858 ret = iommu_device_register(&smmu->iommu);
2859 if (ret) {
2860 dev_err(dev, "Failed to register iommu\n");
2861 return ret;
2864 #ifdef CONFIG_PCI
2865 if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
2866 pci_request_acs();
2867 ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2868 if (ret)
2869 return ret;
2871 #endif
2872 #ifdef CONFIG_ARM_AMBA
2873 if (amba_bustype.iommu_ops != &arm_smmu_ops) {
2874 ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2875 if (ret)
2876 return ret;
2878 #endif
2879 if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
2880 ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2881 if (ret)
2882 return ret;
2884 return 0;
2887 static int arm_smmu_device_remove(struct platform_device *pdev)
2889 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2891 arm_smmu_device_disable(smmu);
2893 return 0;
2896 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2898 arm_smmu_device_remove(pdev);
2901 static const struct of_device_id arm_smmu_of_match[] = {
2902 { .compatible = "arm,smmu-v3", },
2903 { },
2905 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2907 static struct platform_driver arm_smmu_driver = {
2908 .driver = {
2909 .name = "arm-smmu-v3",
2910 .of_match_table = of_match_ptr(arm_smmu_of_match),
2912 .probe = arm_smmu_device_probe,
2913 .remove = arm_smmu_device_remove,
2914 .shutdown = arm_smmu_device_shutdown,
2916 module_platform_driver(arm_smmu_driver);
2918 IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3");
2920 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2921 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2922 MODULE_LICENSE("GPL v2");