Linux 4.19.133
[linux/fpc-iii.git] / drivers / iommu / arm-smmu-v3.c
blob6b7664052b5bece6c1aed8db73685081ddb7c379
1 /*
2 * IOMMU API for ARM architected SMMUv3 implementations.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 * Copyright (C) 2015 ARM Limited
18 * Author: Will Deacon <will.deacon@arm.com>
20 * This driver is powered by bad coffee and bombay mix.
23 #include <linux/acpi.h>
24 #include <linux/acpi_iort.h>
25 #include <linux/bitfield.h>
26 #include <linux/bitops.h>
27 #include <linux/crash_dump.h>
28 #include <linux/delay.h>
29 #include <linux/dma-iommu.h>
30 #include <linux/err.h>
31 #include <linux/interrupt.h>
32 #include <linux/iommu.h>
33 #include <linux/iopoll.h>
34 #include <linux/module.h>
35 #include <linux/msi.h>
36 #include <linux/of.h>
37 #include <linux/of_address.h>
38 #include <linux/of_iommu.h>
39 #include <linux/of_platform.h>
40 #include <linux/pci.h>
41 #include <linux/platform_device.h>
43 #include <linux/amba/bus.h>
45 #include "io-pgtable.h"
47 /* MMIO registers */
48 #define ARM_SMMU_IDR0 0x0
49 #define IDR0_ST_LVL GENMASK(28, 27)
50 #define IDR0_ST_LVL_2LVL 1
51 #define IDR0_STALL_MODEL GENMASK(25, 24)
52 #define IDR0_STALL_MODEL_STALL 0
53 #define IDR0_STALL_MODEL_FORCE 2
54 #define IDR0_TTENDIAN GENMASK(22, 21)
55 #define IDR0_TTENDIAN_MIXED 0
56 #define IDR0_TTENDIAN_LE 2
57 #define IDR0_TTENDIAN_BE 3
58 #define IDR0_CD2L (1 << 19)
59 #define IDR0_VMID16 (1 << 18)
60 #define IDR0_PRI (1 << 16)
61 #define IDR0_SEV (1 << 14)
62 #define IDR0_MSI (1 << 13)
63 #define IDR0_ASID16 (1 << 12)
64 #define IDR0_ATS (1 << 10)
65 #define IDR0_HYP (1 << 9)
66 #define IDR0_COHACC (1 << 4)
67 #define IDR0_TTF GENMASK(3, 2)
68 #define IDR0_TTF_AARCH64 2
69 #define IDR0_TTF_AARCH32_64 3
70 #define IDR0_S1P (1 << 1)
71 #define IDR0_S2P (1 << 0)
73 #define ARM_SMMU_IDR1 0x4
74 #define IDR1_TABLES_PRESET (1 << 30)
75 #define IDR1_QUEUES_PRESET (1 << 29)
76 #define IDR1_REL (1 << 28)
77 #define IDR1_CMDQS GENMASK(25, 21)
78 #define IDR1_EVTQS GENMASK(20, 16)
79 #define IDR1_PRIQS GENMASK(15, 11)
80 #define IDR1_SSIDSIZE GENMASK(10, 6)
81 #define IDR1_SIDSIZE GENMASK(5, 0)
83 #define ARM_SMMU_IDR5 0x14
84 #define IDR5_STALL_MAX GENMASK(31, 16)
85 #define IDR5_GRAN64K (1 << 6)
86 #define IDR5_GRAN16K (1 << 5)
87 #define IDR5_GRAN4K (1 << 4)
88 #define IDR5_OAS GENMASK(2, 0)
89 #define IDR5_OAS_32_BIT 0
90 #define IDR5_OAS_36_BIT 1
91 #define IDR5_OAS_40_BIT 2
92 #define IDR5_OAS_42_BIT 3
93 #define IDR5_OAS_44_BIT 4
94 #define IDR5_OAS_48_BIT 5
95 #define IDR5_OAS_52_BIT 6
96 #define IDR5_VAX GENMASK(11, 10)
97 #define IDR5_VAX_52_BIT 1
99 #define ARM_SMMU_CR0 0x20
100 #define CR0_CMDQEN (1 << 3)
101 #define CR0_EVTQEN (1 << 2)
102 #define CR0_PRIQEN (1 << 1)
103 #define CR0_SMMUEN (1 << 0)
105 #define ARM_SMMU_CR0ACK 0x24
107 #define ARM_SMMU_CR1 0x28
108 #define CR1_TABLE_SH GENMASK(11, 10)
109 #define CR1_TABLE_OC GENMASK(9, 8)
110 #define CR1_TABLE_IC GENMASK(7, 6)
111 #define CR1_QUEUE_SH GENMASK(5, 4)
112 #define CR1_QUEUE_OC GENMASK(3, 2)
113 #define CR1_QUEUE_IC GENMASK(1, 0)
114 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
115 #define CR1_CACHE_NC 0
116 #define CR1_CACHE_WB 1
117 #define CR1_CACHE_WT 2
119 #define ARM_SMMU_CR2 0x2c
120 #define CR2_PTM (1 << 2)
121 #define CR2_RECINVSID (1 << 1)
122 #define CR2_E2H (1 << 0)
124 #define ARM_SMMU_GBPA 0x44
125 #define GBPA_UPDATE (1 << 31)
126 #define GBPA_ABORT (1 << 20)
128 #define ARM_SMMU_IRQ_CTRL 0x50
129 #define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
130 #define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
131 #define IRQ_CTRL_GERROR_IRQEN (1 << 0)
133 #define ARM_SMMU_IRQ_CTRLACK 0x54
135 #define ARM_SMMU_GERROR 0x60
136 #define GERROR_SFM_ERR (1 << 8)
137 #define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
138 #define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
139 #define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
140 #define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
141 #define GERROR_PRIQ_ABT_ERR (1 << 3)
142 #define GERROR_EVTQ_ABT_ERR (1 << 2)
143 #define GERROR_CMDQ_ERR (1 << 0)
144 #define GERROR_ERR_MASK 0xfd
146 #define ARM_SMMU_GERRORN 0x64
148 #define ARM_SMMU_GERROR_IRQ_CFG0 0x68
149 #define ARM_SMMU_GERROR_IRQ_CFG1 0x70
150 #define ARM_SMMU_GERROR_IRQ_CFG2 0x74
152 #define ARM_SMMU_STRTAB_BASE 0x80
153 #define STRTAB_BASE_RA (1UL << 62)
154 #define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
156 #define ARM_SMMU_STRTAB_BASE_CFG 0x88
157 #define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
158 #define STRTAB_BASE_CFG_FMT_LINEAR 0
159 #define STRTAB_BASE_CFG_FMT_2LVL 1
160 #define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
161 #define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
163 #define ARM_SMMU_CMDQ_BASE 0x90
164 #define ARM_SMMU_CMDQ_PROD 0x98
165 #define ARM_SMMU_CMDQ_CONS 0x9c
167 #define ARM_SMMU_EVTQ_BASE 0xa0
168 #define ARM_SMMU_EVTQ_PROD 0x100a8
169 #define ARM_SMMU_EVTQ_CONS 0x100ac
170 #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
171 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
172 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
174 #define ARM_SMMU_PRIQ_BASE 0xc0
175 #define ARM_SMMU_PRIQ_PROD 0x100c8
176 #define ARM_SMMU_PRIQ_CONS 0x100cc
177 #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
178 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
179 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
181 /* Common MSI config fields */
182 #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
183 #define MSI_CFG2_SH GENMASK(5, 4)
184 #define MSI_CFG2_MEMATTR GENMASK(3, 0)
186 /* Common memory attribute values */
187 #define ARM_SMMU_SH_NSH 0
188 #define ARM_SMMU_SH_OSH 2
189 #define ARM_SMMU_SH_ISH 3
190 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
191 #define ARM_SMMU_MEMATTR_OIWB 0xf
193 #define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1))
194 #define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift))
195 #define Q_OVERFLOW_FLAG (1 << 31)
196 #define Q_OVF(q, p) ((p) & Q_OVERFLOW_FLAG)
197 #define Q_ENT(q, p) ((q)->base + \
198 Q_IDX(q, p) * (q)->ent_dwords)
200 #define Q_BASE_RWA (1UL << 62)
201 #define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
202 #define Q_BASE_LOG2SIZE GENMASK(4, 0)
205 * Stream table.
207 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
208 * 2lvl: 128k L1 entries,
209 * 256 lazy entries per table (each table covers a PCI bus)
211 #define STRTAB_L1_SZ_SHIFT 20
212 #define STRTAB_SPLIT 8
214 #define STRTAB_L1_DESC_DWORDS 1
215 #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
216 #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
218 #define STRTAB_STE_DWORDS 8
219 #define STRTAB_STE_0_V (1UL << 0)
220 #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
221 #define STRTAB_STE_0_CFG_ABORT 0
222 #define STRTAB_STE_0_CFG_BYPASS 4
223 #define STRTAB_STE_0_CFG_S1_TRANS 5
224 #define STRTAB_STE_0_CFG_S2_TRANS 6
226 #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
227 #define STRTAB_STE_0_S1FMT_LINEAR 0
228 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
229 #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
231 #define STRTAB_STE_1_S1C_CACHE_NC 0UL
232 #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
233 #define STRTAB_STE_1_S1C_CACHE_WT 2UL
234 #define STRTAB_STE_1_S1C_CACHE_WB 3UL
235 #define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
236 #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
237 #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
239 #define STRTAB_STE_1_S1STALLD (1UL << 27)
241 #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
242 #define STRTAB_STE_1_EATS_ABT 0UL
243 #define STRTAB_STE_1_EATS_TRANS 1UL
244 #define STRTAB_STE_1_EATS_S1CHK 2UL
246 #define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
247 #define STRTAB_STE_1_STRW_NSEL1 0UL
248 #define STRTAB_STE_1_STRW_EL2 2UL
250 #define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
251 #define STRTAB_STE_1_SHCFG_INCOMING 1UL
253 #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
254 #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
255 #define STRTAB_STE_2_S2AA64 (1UL << 51)
256 #define STRTAB_STE_2_S2ENDI (1UL << 52)
257 #define STRTAB_STE_2_S2PTW (1UL << 54)
258 #define STRTAB_STE_2_S2R (1UL << 58)
260 #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
262 /* Context descriptor (stage-1 only) */
263 #define CTXDESC_CD_DWORDS 8
264 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
265 #define ARM64_TCR_T0SZ GENMASK_ULL(5, 0)
266 #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
267 #define ARM64_TCR_TG0 GENMASK_ULL(15, 14)
268 #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
269 #define ARM64_TCR_IRGN0 GENMASK_ULL(9, 8)
270 #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
271 #define ARM64_TCR_ORGN0 GENMASK_ULL(11, 10)
272 #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
273 #define ARM64_TCR_SH0 GENMASK_ULL(13, 12)
274 #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
275 #define ARM64_TCR_EPD0 (1ULL << 7)
276 #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
277 #define ARM64_TCR_EPD1 (1ULL << 23)
279 #define CTXDESC_CD_0_ENDI (1UL << 15)
280 #define CTXDESC_CD_0_V (1UL << 31)
282 #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
283 #define ARM64_TCR_IPS GENMASK_ULL(34, 32)
284 #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
285 #define ARM64_TCR_TBI0 (1ULL << 37)
287 #define CTXDESC_CD_0_AA64 (1UL << 41)
288 #define CTXDESC_CD_0_S (1UL << 44)
289 #define CTXDESC_CD_0_R (1UL << 45)
290 #define CTXDESC_CD_0_A (1UL << 46)
291 #define CTXDESC_CD_0_ASET (1UL << 47)
292 #define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
294 #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
296 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
297 #define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
298 FIELD_GET(ARM64_TCR_##fld, tcr))
300 /* Command queue */
301 #define CMDQ_ENT_DWORDS 2
302 #define CMDQ_MAX_SZ_SHIFT 8
304 #define CMDQ_CONS_ERR GENMASK(30, 24)
305 #define CMDQ_ERR_CERROR_NONE_IDX 0
306 #define CMDQ_ERR_CERROR_ILL_IDX 1
307 #define CMDQ_ERR_CERROR_ABT_IDX 2
309 #define CMDQ_0_OP GENMASK_ULL(7, 0)
310 #define CMDQ_0_SSV (1UL << 11)
312 #define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
313 #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
314 #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
316 #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
317 #define CMDQ_CFGI_1_LEAF (1UL << 0)
318 #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
320 #define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
321 #define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
322 #define CMDQ_TLBI_1_LEAF (1UL << 0)
323 #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
324 #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
326 #define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
327 #define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
328 #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
329 #define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
331 #define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
332 #define CMDQ_SYNC_0_CS_NONE 0
333 #define CMDQ_SYNC_0_CS_IRQ 1
334 #define CMDQ_SYNC_0_CS_SEV 2
335 #define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
336 #define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
337 #define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
338 #define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
340 /* Event queue */
341 #define EVTQ_ENT_DWORDS 4
342 #define EVTQ_MAX_SZ_SHIFT 7
344 #define EVTQ_0_ID GENMASK_ULL(7, 0)
346 /* PRI queue */
347 #define PRIQ_ENT_DWORDS 2
348 #define PRIQ_MAX_SZ_SHIFT 8
350 #define PRIQ_0_SID GENMASK_ULL(31, 0)
351 #define PRIQ_0_SSID GENMASK_ULL(51, 32)
352 #define PRIQ_0_PERM_PRIV (1UL << 58)
353 #define PRIQ_0_PERM_EXEC (1UL << 59)
354 #define PRIQ_0_PERM_READ (1UL << 60)
355 #define PRIQ_0_PERM_WRITE (1UL << 61)
356 #define PRIQ_0_PRG_LAST (1UL << 62)
357 #define PRIQ_0_SSID_V (1UL << 63)
359 #define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
360 #define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
362 /* High-level queue structures */
363 #define ARM_SMMU_POLL_TIMEOUT_US 100
364 #define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
365 #define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
367 #define MSI_IOVA_BASE 0x8000000
368 #define MSI_IOVA_LENGTH 0x100000
370 static bool disable_bypass = 1;
371 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
372 MODULE_PARM_DESC(disable_bypass,
373 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
375 enum pri_resp {
376 PRI_RESP_DENY = 0,
377 PRI_RESP_FAIL = 1,
378 PRI_RESP_SUCC = 2,
381 enum arm_smmu_msi_index {
382 EVTQ_MSI_INDEX,
383 GERROR_MSI_INDEX,
384 PRIQ_MSI_INDEX,
385 ARM_SMMU_MAX_MSIS,
388 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
389 [EVTQ_MSI_INDEX] = {
390 ARM_SMMU_EVTQ_IRQ_CFG0,
391 ARM_SMMU_EVTQ_IRQ_CFG1,
392 ARM_SMMU_EVTQ_IRQ_CFG2,
394 [GERROR_MSI_INDEX] = {
395 ARM_SMMU_GERROR_IRQ_CFG0,
396 ARM_SMMU_GERROR_IRQ_CFG1,
397 ARM_SMMU_GERROR_IRQ_CFG2,
399 [PRIQ_MSI_INDEX] = {
400 ARM_SMMU_PRIQ_IRQ_CFG0,
401 ARM_SMMU_PRIQ_IRQ_CFG1,
402 ARM_SMMU_PRIQ_IRQ_CFG2,
406 struct arm_smmu_cmdq_ent {
407 /* Common fields */
408 u8 opcode;
409 bool substream_valid;
411 /* Command-specific fields */
412 union {
413 #define CMDQ_OP_PREFETCH_CFG 0x1
414 struct {
415 u32 sid;
416 u8 size;
417 u64 addr;
418 } prefetch;
420 #define CMDQ_OP_CFGI_STE 0x3
421 #define CMDQ_OP_CFGI_ALL 0x4
422 struct {
423 u32 sid;
424 union {
425 bool leaf;
426 u8 span;
428 } cfgi;
430 #define CMDQ_OP_TLBI_NH_ASID 0x11
431 #define CMDQ_OP_TLBI_NH_VA 0x12
432 #define CMDQ_OP_TLBI_EL2_ALL 0x20
433 #define CMDQ_OP_TLBI_S12_VMALL 0x28
434 #define CMDQ_OP_TLBI_S2_IPA 0x2a
435 #define CMDQ_OP_TLBI_NSNH_ALL 0x30
436 struct {
437 u16 asid;
438 u16 vmid;
439 bool leaf;
440 u64 addr;
441 } tlbi;
443 #define CMDQ_OP_PRI_RESP 0x41
444 struct {
445 u32 sid;
446 u32 ssid;
447 u16 grpid;
448 enum pri_resp resp;
449 } pri;
451 #define CMDQ_OP_CMD_SYNC 0x46
452 struct {
453 u32 msidata;
454 u64 msiaddr;
455 } sync;
459 struct arm_smmu_queue {
460 int irq; /* Wired interrupt */
462 __le64 *base;
463 dma_addr_t base_dma;
464 u64 q_base;
466 size_t ent_dwords;
467 u32 max_n_shift;
468 u32 prod;
469 u32 cons;
471 u32 __iomem *prod_reg;
472 u32 __iomem *cons_reg;
475 struct arm_smmu_cmdq {
476 struct arm_smmu_queue q;
477 spinlock_t lock;
480 struct arm_smmu_evtq {
481 struct arm_smmu_queue q;
482 u32 max_stalls;
485 struct arm_smmu_priq {
486 struct arm_smmu_queue q;
489 /* High-level stream table and context descriptor structures */
490 struct arm_smmu_strtab_l1_desc {
491 u8 span;
493 __le64 *l2ptr;
494 dma_addr_t l2ptr_dma;
497 struct arm_smmu_s1_cfg {
498 __le64 *cdptr;
499 dma_addr_t cdptr_dma;
501 struct arm_smmu_ctx_desc {
502 u16 asid;
503 u64 ttbr;
504 u64 tcr;
505 u64 mair;
506 } cd;
509 struct arm_smmu_s2_cfg {
510 u16 vmid;
511 u64 vttbr;
512 u64 vtcr;
515 struct arm_smmu_strtab_ent {
517 * An STE is "assigned" if the master emitting the corresponding SID
518 * is attached to a domain. The behaviour of an unassigned STE is
519 * determined by the disable_bypass parameter, whereas an assigned
520 * STE behaves according to s1_cfg/s2_cfg, which themselves are
521 * configured according to the domain type.
523 bool assigned;
524 struct arm_smmu_s1_cfg *s1_cfg;
525 struct arm_smmu_s2_cfg *s2_cfg;
528 struct arm_smmu_strtab_cfg {
529 __le64 *strtab;
530 dma_addr_t strtab_dma;
531 struct arm_smmu_strtab_l1_desc *l1_desc;
532 unsigned int num_l1_ents;
534 u64 strtab_base;
535 u32 strtab_base_cfg;
538 /* An SMMUv3 instance */
539 struct arm_smmu_device {
540 struct device *dev;
541 void __iomem *base;
543 #define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
544 #define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
545 #define ARM_SMMU_FEAT_TT_LE (1 << 2)
546 #define ARM_SMMU_FEAT_TT_BE (1 << 3)
547 #define ARM_SMMU_FEAT_PRI (1 << 4)
548 #define ARM_SMMU_FEAT_ATS (1 << 5)
549 #define ARM_SMMU_FEAT_SEV (1 << 6)
550 #define ARM_SMMU_FEAT_MSI (1 << 7)
551 #define ARM_SMMU_FEAT_COHERENCY (1 << 8)
552 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
553 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
554 #define ARM_SMMU_FEAT_STALLS (1 << 11)
555 #define ARM_SMMU_FEAT_HYP (1 << 12)
556 #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
557 #define ARM_SMMU_FEAT_VAX (1 << 14)
558 u32 features;
560 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
561 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
562 u32 options;
564 struct arm_smmu_cmdq cmdq;
565 struct arm_smmu_evtq evtq;
566 struct arm_smmu_priq priq;
568 int gerr_irq;
569 int combined_irq;
570 u32 sync_nr;
572 unsigned long ias; /* IPA */
573 unsigned long oas; /* PA */
574 unsigned long pgsize_bitmap;
576 #define ARM_SMMU_MAX_ASIDS (1 << 16)
577 unsigned int asid_bits;
578 DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
580 #define ARM_SMMU_MAX_VMIDS (1 << 16)
581 unsigned int vmid_bits;
582 DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
584 unsigned int ssid_bits;
585 unsigned int sid_bits;
587 struct arm_smmu_strtab_cfg strtab_cfg;
589 /* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
590 union {
591 u32 sync_count;
592 u64 padding;
595 /* IOMMU core code handle */
596 struct iommu_device iommu;
599 /* SMMU private data for each master */
600 struct arm_smmu_master_data {
601 struct arm_smmu_device *smmu;
602 struct arm_smmu_strtab_ent ste;
605 /* SMMU private data for an IOMMU domain */
606 enum arm_smmu_domain_stage {
607 ARM_SMMU_DOMAIN_S1 = 0,
608 ARM_SMMU_DOMAIN_S2,
609 ARM_SMMU_DOMAIN_NESTED,
610 ARM_SMMU_DOMAIN_BYPASS,
613 struct arm_smmu_domain {
614 struct arm_smmu_device *smmu;
615 struct mutex init_mutex; /* Protects smmu pointer */
617 struct io_pgtable_ops *pgtbl_ops;
619 enum arm_smmu_domain_stage stage;
620 union {
621 struct arm_smmu_s1_cfg s1_cfg;
622 struct arm_smmu_s2_cfg s2_cfg;
625 struct iommu_domain domain;
628 struct arm_smmu_option_prop {
629 u32 opt;
630 const char *prop;
633 static struct arm_smmu_option_prop arm_smmu_options[] = {
634 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
635 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
636 { 0, NULL},
639 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
640 struct arm_smmu_device *smmu)
642 if ((offset > SZ_64K) &&
643 (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY))
644 offset -= SZ_64K;
646 return smmu->base + offset;
649 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
651 return container_of(dom, struct arm_smmu_domain, domain);
654 static void parse_driver_options(struct arm_smmu_device *smmu)
656 int i = 0;
658 do {
659 if (of_property_read_bool(smmu->dev->of_node,
660 arm_smmu_options[i].prop)) {
661 smmu->options |= arm_smmu_options[i].opt;
662 dev_notice(smmu->dev, "option %s\n",
663 arm_smmu_options[i].prop);
665 } while (arm_smmu_options[++i].opt);
668 /* Low-level queue manipulation functions */
669 static bool queue_full(struct arm_smmu_queue *q)
671 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
672 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
675 static bool queue_empty(struct arm_smmu_queue *q)
677 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
678 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
681 static void queue_sync_cons(struct arm_smmu_queue *q)
683 q->cons = readl_relaxed(q->cons_reg);
686 static void queue_inc_cons(struct arm_smmu_queue *q)
688 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
690 q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
693 * Ensure that all CPU accesses (reads and writes) to the queue
694 * are complete before we update the cons pointer.
696 mb();
697 writel_relaxed(q->cons, q->cons_reg);
700 static int queue_sync_prod(struct arm_smmu_queue *q)
702 int ret = 0;
703 u32 prod = readl_relaxed(q->prod_reg);
705 if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
706 ret = -EOVERFLOW;
708 q->prod = prod;
709 return ret;
712 static void queue_inc_prod(struct arm_smmu_queue *q)
714 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
716 q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
717 writel(q->prod, q->prod_reg);
721 * Wait for the SMMU to consume items. If drain is true, wait until the queue
722 * is empty. Otherwise, wait until there is at least one free slot.
724 static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
726 ktime_t timeout;
727 unsigned int delay = 1, spin_cnt = 0;
729 /* Wait longer if it's a CMD_SYNC */
730 timeout = ktime_add_us(ktime_get(), sync ?
731 ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
732 ARM_SMMU_POLL_TIMEOUT_US);
734 while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
735 if (ktime_compare(ktime_get(), timeout) > 0)
736 return -ETIMEDOUT;
738 if (wfe) {
739 wfe();
740 } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
741 cpu_relax();
742 continue;
743 } else {
744 udelay(delay);
745 delay *= 2;
746 spin_cnt = 0;
750 return 0;
753 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
755 int i;
757 for (i = 0; i < n_dwords; ++i)
758 *dst++ = cpu_to_le64(*src++);
761 static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
763 if (queue_full(q))
764 return -ENOSPC;
766 queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
767 queue_inc_prod(q);
768 return 0;
771 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
773 int i;
775 for (i = 0; i < n_dwords; ++i)
776 *dst++ = le64_to_cpu(*src++);
779 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
781 if (queue_empty(q))
782 return -EAGAIN;
784 queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
785 queue_inc_cons(q);
786 return 0;
789 /* High-level queue accessors */
790 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
792 memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
793 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
795 switch (ent->opcode) {
796 case CMDQ_OP_TLBI_EL2_ALL:
797 case CMDQ_OP_TLBI_NSNH_ALL:
798 break;
799 case CMDQ_OP_PREFETCH_CFG:
800 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
801 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
802 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
803 break;
804 case CMDQ_OP_CFGI_STE:
805 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
806 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
807 break;
808 case CMDQ_OP_CFGI_ALL:
809 /* Cover the entire SID range */
810 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
811 break;
812 case CMDQ_OP_TLBI_NH_VA:
813 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
814 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
815 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
816 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
817 break;
818 case CMDQ_OP_TLBI_S2_IPA:
819 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
820 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
821 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
822 break;
823 case CMDQ_OP_TLBI_NH_ASID:
824 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
825 /* Fallthrough */
826 case CMDQ_OP_TLBI_S12_VMALL:
827 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
828 break;
829 case CMDQ_OP_PRI_RESP:
830 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
831 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
832 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
833 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
834 switch (ent->pri.resp) {
835 case PRI_RESP_DENY:
836 case PRI_RESP_FAIL:
837 case PRI_RESP_SUCC:
838 break;
839 default:
840 return -EINVAL;
842 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
843 break;
844 case CMDQ_OP_CMD_SYNC:
845 if (ent->sync.msiaddr)
846 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
847 else
848 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
849 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
850 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
852 * Commands are written little-endian, but we want the SMMU to
853 * receive MSIData, and thus write it back to memory, in CPU
854 * byte order, so big-endian needs an extra byteswap here.
856 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
857 cpu_to_le32(ent->sync.msidata));
858 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
859 break;
860 default:
861 return -ENOENT;
864 return 0;
867 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
869 static const char *cerror_str[] = {
870 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
871 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
872 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
875 int i;
876 u64 cmd[CMDQ_ENT_DWORDS];
877 struct arm_smmu_queue *q = &smmu->cmdq.q;
878 u32 cons = readl_relaxed(q->cons_reg);
879 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
880 struct arm_smmu_cmdq_ent cmd_sync = {
881 .opcode = CMDQ_OP_CMD_SYNC,
884 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
885 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
887 switch (idx) {
888 case CMDQ_ERR_CERROR_ABT_IDX:
889 dev_err(smmu->dev, "retrying command fetch\n");
890 case CMDQ_ERR_CERROR_NONE_IDX:
891 return;
892 case CMDQ_ERR_CERROR_ILL_IDX:
893 /* Fallthrough */
894 default:
895 break;
899 * We may have concurrent producers, so we need to be careful
900 * not to touch any of the shadow cmdq state.
902 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
903 dev_err(smmu->dev, "skipping command in error state:\n");
904 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
905 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
907 /* Convert the erroneous command into a CMD_SYNC */
908 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
909 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
910 return;
913 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
916 static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
918 struct arm_smmu_queue *q = &smmu->cmdq.q;
919 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
921 while (queue_insert_raw(q, cmd) == -ENOSPC) {
922 if (queue_poll_cons(q, false, wfe))
923 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
927 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
928 struct arm_smmu_cmdq_ent *ent)
930 u64 cmd[CMDQ_ENT_DWORDS];
931 unsigned long flags;
933 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
934 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
935 ent->opcode);
936 return;
939 spin_lock_irqsave(&smmu->cmdq.lock, flags);
940 arm_smmu_cmdq_insert_cmd(smmu, cmd);
941 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
945 * The difference between val and sync_idx is bounded by the maximum size of
946 * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
948 static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
950 ktime_t timeout;
951 u32 val;
953 timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
954 val = smp_cond_load_acquire(&smmu->sync_count,
955 (int)(VAL - sync_idx) >= 0 ||
956 !ktime_before(ktime_get(), timeout));
958 return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
961 static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
963 u64 cmd[CMDQ_ENT_DWORDS];
964 unsigned long flags;
965 struct arm_smmu_cmdq_ent ent = {
966 .opcode = CMDQ_OP_CMD_SYNC,
967 .sync = {
968 .msiaddr = virt_to_phys(&smmu->sync_count),
972 spin_lock_irqsave(&smmu->cmdq.lock, flags);
973 ent.sync.msidata = ++smmu->sync_nr;
974 arm_smmu_cmdq_build_cmd(cmd, &ent);
975 arm_smmu_cmdq_insert_cmd(smmu, cmd);
976 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
978 return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
981 static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
983 u64 cmd[CMDQ_ENT_DWORDS];
984 unsigned long flags;
985 bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
986 struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
987 int ret;
989 arm_smmu_cmdq_build_cmd(cmd, &ent);
991 spin_lock_irqsave(&smmu->cmdq.lock, flags);
992 arm_smmu_cmdq_insert_cmd(smmu, cmd);
993 ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
994 spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
996 return ret;
999 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1001 int ret;
1002 bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
1003 (smmu->features & ARM_SMMU_FEAT_COHERENCY);
1005 ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
1006 : __arm_smmu_cmdq_issue_sync(smmu);
1007 if (ret)
1008 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
1011 /* Context descriptor manipulation functions */
1012 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
1014 u64 val = 0;
1016 /* Repack the TCR. Just care about TTBR0 for now */
1017 val |= ARM_SMMU_TCR2CD(tcr, T0SZ);
1018 val |= ARM_SMMU_TCR2CD(tcr, TG0);
1019 val |= ARM_SMMU_TCR2CD(tcr, IRGN0);
1020 val |= ARM_SMMU_TCR2CD(tcr, ORGN0);
1021 val |= ARM_SMMU_TCR2CD(tcr, SH0);
1022 val |= ARM_SMMU_TCR2CD(tcr, EPD0);
1023 val |= ARM_SMMU_TCR2CD(tcr, EPD1);
1024 val |= ARM_SMMU_TCR2CD(tcr, IPS);
1025 val |= ARM_SMMU_TCR2CD(tcr, TBI0);
1027 return val;
1030 static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
1031 struct arm_smmu_s1_cfg *cfg)
1033 u64 val;
1036 * We don't need to issue any invalidation here, as we'll invalidate
1037 * the STE when installing the new entry anyway.
1039 val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) |
1040 #ifdef __BIG_ENDIAN
1041 CTXDESC_CD_0_ENDI |
1042 #endif
1043 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1044 CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
1045 CTXDESC_CD_0_V;
1047 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1048 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1049 val |= CTXDESC_CD_0_S;
1051 cfg->cdptr[0] = cpu_to_le64(val);
1053 val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
1054 cfg->cdptr[1] = cpu_to_le64(val);
1056 cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
1059 /* Stream table manipulation functions */
1060 static void
1061 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1063 u64 val = 0;
1065 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1066 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1068 *dst = cpu_to_le64(val);
1071 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1073 struct arm_smmu_cmdq_ent cmd = {
1074 .opcode = CMDQ_OP_CFGI_STE,
1075 .cfgi = {
1076 .sid = sid,
1077 .leaf = true,
1081 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1082 arm_smmu_cmdq_issue_sync(smmu);
1085 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
1086 __le64 *dst, struct arm_smmu_strtab_ent *ste)
1089 * This is hideously complicated, but we only really care about
1090 * three cases at the moment:
1092 * 1. Invalid (all zero) -> bypass/fault (init)
1093 * 2. Bypass/fault -> translation/bypass (attach)
1094 * 3. Translation/bypass -> bypass/fault (detach)
1096 * Given that we can't update the STE atomically and the SMMU
1097 * doesn't read the thing in a defined order, that leaves us
1098 * with the following maintenance requirements:
1100 * 1. Update Config, return (init time STEs aren't live)
1101 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1102 * 3. Update Config, sync
1104 u64 val = le64_to_cpu(dst[0]);
1105 bool ste_live = false;
1106 struct arm_smmu_cmdq_ent prefetch_cmd = {
1107 .opcode = CMDQ_OP_PREFETCH_CFG,
1108 .prefetch = {
1109 .sid = sid,
1113 if (val & STRTAB_STE_0_V) {
1114 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1115 case STRTAB_STE_0_CFG_BYPASS:
1116 break;
1117 case STRTAB_STE_0_CFG_S1_TRANS:
1118 case STRTAB_STE_0_CFG_S2_TRANS:
1119 ste_live = true;
1120 break;
1121 case STRTAB_STE_0_CFG_ABORT:
1122 if (disable_bypass)
1123 break;
1124 default:
1125 BUG(); /* STE corruption */
1129 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1130 val = STRTAB_STE_0_V;
1132 /* Bypass/fault */
1133 if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
1134 if (!ste->assigned && disable_bypass)
1135 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1136 else
1137 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1139 dst[0] = cpu_to_le64(val);
1140 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1141 STRTAB_STE_1_SHCFG_INCOMING));
1142 dst[2] = 0; /* Nuke the VMID */
1144 * The SMMU can perform negative caching, so we must sync
1145 * the STE regardless of whether the old value was live.
1147 if (smmu)
1148 arm_smmu_sync_ste_for_sid(smmu, sid);
1149 return;
1152 if (ste->s1_cfg) {
1153 BUG_ON(ste_live);
1154 dst[1] = cpu_to_le64(
1155 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1156 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1157 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1158 #ifdef CONFIG_PCI_ATS
1159 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
1160 #endif
1161 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1163 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1164 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1165 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1167 val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1168 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
1171 if (ste->s2_cfg) {
1172 BUG_ON(ste_live);
1173 dst[2] = cpu_to_le64(
1174 FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
1175 FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
1176 #ifdef __BIG_ENDIAN
1177 STRTAB_STE_2_S2ENDI |
1178 #endif
1179 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1180 STRTAB_STE_2_S2R);
1182 dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1184 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1187 arm_smmu_sync_ste_for_sid(smmu, sid);
1188 /* See comment in arm_smmu_write_ctx_desc() */
1189 WRITE_ONCE(dst[0], cpu_to_le64(val));
1190 arm_smmu_sync_ste_for_sid(smmu, sid);
1192 /* It's likely that we'll want to use the new STE soon */
1193 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1194 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1197 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1199 unsigned int i;
1200 struct arm_smmu_strtab_ent ste = { .assigned = false };
1202 for (i = 0; i < nent; ++i) {
1203 arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
1204 strtab += STRTAB_STE_DWORDS;
1208 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1210 size_t size;
1211 void *strtab;
1212 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1213 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1215 if (desc->l2ptr)
1216 return 0;
1218 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1219 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1221 desc->span = STRTAB_SPLIT + 1;
1222 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1223 GFP_KERNEL | __GFP_ZERO);
1224 if (!desc->l2ptr) {
1225 dev_err(smmu->dev,
1226 "failed to allocate l2 stream table for SID %u\n",
1227 sid);
1228 return -ENOMEM;
1231 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1232 arm_smmu_write_strtab_l1_desc(strtab, desc);
1233 return 0;
1236 /* IRQ and event handlers */
1237 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1239 int i;
1240 struct arm_smmu_device *smmu = dev;
1241 struct arm_smmu_queue *q = &smmu->evtq.q;
1242 u64 evt[EVTQ_ENT_DWORDS];
1244 do {
1245 while (!queue_remove_raw(q, evt)) {
1246 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1248 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1249 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1250 dev_info(smmu->dev, "\t0x%016llx\n",
1251 (unsigned long long)evt[i]);
1256 * Not much we can do on overflow, so scream and pretend we're
1257 * trying harder.
1259 if (queue_sync_prod(q) == -EOVERFLOW)
1260 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1261 } while (!queue_empty(q));
1263 /* Sync our overflow flag, as we believe we're up to speed */
1264 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1265 return IRQ_HANDLED;
1268 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1270 u32 sid, ssid;
1271 u16 grpid;
1272 bool ssv, last;
1274 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1275 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1276 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
1277 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1278 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1280 dev_info(smmu->dev, "unexpected PRI request received:\n");
1281 dev_info(smmu->dev,
1282 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1283 sid, ssid, grpid, last ? "L" : "",
1284 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1285 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1286 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1287 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1288 evt[1] & PRIQ_1_ADDR_MASK);
1290 if (last) {
1291 struct arm_smmu_cmdq_ent cmd = {
1292 .opcode = CMDQ_OP_PRI_RESP,
1293 .substream_valid = ssv,
1294 .pri = {
1295 .sid = sid,
1296 .ssid = ssid,
1297 .grpid = grpid,
1298 .resp = PRI_RESP_DENY,
1302 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1306 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1308 struct arm_smmu_device *smmu = dev;
1309 struct arm_smmu_queue *q = &smmu->priq.q;
1310 u64 evt[PRIQ_ENT_DWORDS];
1312 do {
1313 while (!queue_remove_raw(q, evt))
1314 arm_smmu_handle_ppr(smmu, evt);
1316 if (queue_sync_prod(q) == -EOVERFLOW)
1317 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1318 } while (!queue_empty(q));
1320 /* Sync our overflow flag, as we believe we're up to speed */
1321 q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
1322 writel(q->cons, q->cons_reg);
1323 return IRQ_HANDLED;
1326 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1328 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1330 u32 gerror, gerrorn, active;
1331 struct arm_smmu_device *smmu = dev;
1333 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1334 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1336 active = gerror ^ gerrorn;
1337 if (!(active & GERROR_ERR_MASK))
1338 return IRQ_NONE; /* No errors pending */
1340 dev_warn(smmu->dev,
1341 "unexpected global error reported (0x%08x), this could be serious\n",
1342 active);
1344 if (active & GERROR_SFM_ERR) {
1345 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1346 arm_smmu_device_disable(smmu);
1349 if (active & GERROR_MSI_GERROR_ABT_ERR)
1350 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1352 if (active & GERROR_MSI_PRIQ_ABT_ERR)
1353 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1355 if (active & GERROR_MSI_EVTQ_ABT_ERR)
1356 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1358 if (active & GERROR_MSI_CMDQ_ABT_ERR)
1359 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1361 if (active & GERROR_PRIQ_ABT_ERR)
1362 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1364 if (active & GERROR_EVTQ_ABT_ERR)
1365 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1367 if (active & GERROR_CMDQ_ERR)
1368 arm_smmu_cmdq_skip_err(smmu);
1370 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1371 return IRQ_HANDLED;
1374 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1376 struct arm_smmu_device *smmu = dev;
1378 arm_smmu_evtq_thread(irq, dev);
1379 if (smmu->features & ARM_SMMU_FEAT_PRI)
1380 arm_smmu_priq_thread(irq, dev);
1382 return IRQ_HANDLED;
1385 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1387 arm_smmu_gerror_handler(irq, dev);
1388 return IRQ_WAKE_THREAD;
1391 /* IO_PGTABLE API */
1392 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
1394 arm_smmu_cmdq_issue_sync(smmu);
1397 static void arm_smmu_tlb_sync(void *cookie)
1399 struct arm_smmu_domain *smmu_domain = cookie;
1400 __arm_smmu_tlb_sync(smmu_domain->smmu);
1403 static void arm_smmu_tlb_inv_context(void *cookie)
1405 struct arm_smmu_domain *smmu_domain = cookie;
1406 struct arm_smmu_device *smmu = smmu_domain->smmu;
1407 struct arm_smmu_cmdq_ent cmd;
1409 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1410 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
1411 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1412 cmd.tlbi.vmid = 0;
1413 } else {
1414 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
1415 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1418 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1419 __arm_smmu_tlb_sync(smmu);
1422 static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
1423 size_t granule, bool leaf, void *cookie)
1425 struct arm_smmu_domain *smmu_domain = cookie;
1426 struct arm_smmu_device *smmu = smmu_domain->smmu;
1427 struct arm_smmu_cmdq_ent cmd = {
1428 .tlbi = {
1429 .leaf = leaf,
1430 .addr = iova,
1434 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1435 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
1436 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
1437 } else {
1438 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
1439 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
1442 do {
1443 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1444 cmd.tlbi.addr += granule;
1445 } while (size -= granule);
1448 static const struct iommu_gather_ops arm_smmu_gather_ops = {
1449 .tlb_flush_all = arm_smmu_tlb_inv_context,
1450 .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
1451 .tlb_sync = arm_smmu_tlb_sync,
1454 /* IOMMU API */
1455 static bool arm_smmu_capable(enum iommu_cap cap)
1457 switch (cap) {
1458 case IOMMU_CAP_CACHE_COHERENCY:
1459 return true;
1460 case IOMMU_CAP_NOEXEC:
1461 return true;
1462 default:
1463 return false;
1467 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1469 struct arm_smmu_domain *smmu_domain;
1471 if (type != IOMMU_DOMAIN_UNMANAGED &&
1472 type != IOMMU_DOMAIN_DMA &&
1473 type != IOMMU_DOMAIN_IDENTITY)
1474 return NULL;
1477 * Allocate the domain and initialise some of its data structures.
1478 * We can't really do anything meaningful until we've added a
1479 * master.
1481 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
1482 if (!smmu_domain)
1483 return NULL;
1485 if (type == IOMMU_DOMAIN_DMA &&
1486 iommu_get_dma_cookie(&smmu_domain->domain)) {
1487 kfree(smmu_domain);
1488 return NULL;
1491 mutex_init(&smmu_domain->init_mutex);
1492 return &smmu_domain->domain;
1495 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
1497 int idx, size = 1 << span;
1499 do {
1500 idx = find_first_zero_bit(map, size);
1501 if (idx == size)
1502 return -ENOSPC;
1503 } while (test_and_set_bit(idx, map));
1505 return idx;
1508 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
1510 clear_bit(idx, map);
1513 static void arm_smmu_domain_free(struct iommu_domain *domain)
1515 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1516 struct arm_smmu_device *smmu = smmu_domain->smmu;
1518 iommu_put_dma_cookie(domain);
1519 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1521 /* Free the CD and ASID, if we allocated them */
1522 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1523 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1525 if (cfg->cdptr) {
1526 dmam_free_coherent(smmu_domain->smmu->dev,
1527 CTXDESC_CD_DWORDS << 3,
1528 cfg->cdptr,
1529 cfg->cdptr_dma);
1531 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
1533 } else {
1534 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1535 if (cfg->vmid)
1536 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
1539 kfree(smmu_domain);
1542 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
1543 struct io_pgtable_cfg *pgtbl_cfg)
1545 int ret;
1546 int asid;
1547 struct arm_smmu_device *smmu = smmu_domain->smmu;
1548 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1550 asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
1551 if (asid < 0)
1552 return asid;
1554 cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3,
1555 &cfg->cdptr_dma,
1556 GFP_KERNEL | __GFP_ZERO);
1557 if (!cfg->cdptr) {
1558 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1559 ret = -ENOMEM;
1560 goto out_free_asid;
1563 cfg->cd.asid = (u16)asid;
1564 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
1565 cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
1566 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
1567 return 0;
1569 out_free_asid:
1570 arm_smmu_bitmap_free(smmu->asid_map, asid);
1571 return ret;
1574 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
1575 struct io_pgtable_cfg *pgtbl_cfg)
1577 int vmid;
1578 struct arm_smmu_device *smmu = smmu_domain->smmu;
1579 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
1581 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
1582 if (vmid < 0)
1583 return vmid;
1585 cfg->vmid = (u16)vmid;
1586 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
1587 cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
1588 return 0;
1591 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
1593 int ret;
1594 unsigned long ias, oas;
1595 enum io_pgtable_fmt fmt;
1596 struct io_pgtable_cfg pgtbl_cfg;
1597 struct io_pgtable_ops *pgtbl_ops;
1598 int (*finalise_stage_fn)(struct arm_smmu_domain *,
1599 struct io_pgtable_cfg *);
1600 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1601 struct arm_smmu_device *smmu = smmu_domain->smmu;
1603 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
1604 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
1605 return 0;
1608 /* Restrict the stage to what we can actually support */
1609 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
1610 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
1611 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
1612 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1614 switch (smmu_domain->stage) {
1615 case ARM_SMMU_DOMAIN_S1:
1616 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
1617 ias = min_t(unsigned long, ias, VA_BITS);
1618 oas = smmu->ias;
1619 fmt = ARM_64_LPAE_S1;
1620 finalise_stage_fn = arm_smmu_domain_finalise_s1;
1621 break;
1622 case ARM_SMMU_DOMAIN_NESTED:
1623 case ARM_SMMU_DOMAIN_S2:
1624 ias = smmu->ias;
1625 oas = smmu->oas;
1626 fmt = ARM_64_LPAE_S2;
1627 finalise_stage_fn = arm_smmu_domain_finalise_s2;
1628 break;
1629 default:
1630 return -EINVAL;
1633 pgtbl_cfg = (struct io_pgtable_cfg) {
1634 .pgsize_bitmap = smmu->pgsize_bitmap,
1635 .ias = ias,
1636 .oas = oas,
1637 .tlb = &arm_smmu_gather_ops,
1638 .iommu_dev = smmu->dev,
1641 if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
1642 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
1644 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
1645 if (!pgtbl_ops)
1646 return -ENOMEM;
1648 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
1649 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
1650 domain->geometry.force_aperture = true;
1652 ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
1653 if (ret < 0) {
1654 free_io_pgtable_ops(pgtbl_ops);
1655 return ret;
1658 smmu_domain->pgtbl_ops = pgtbl_ops;
1659 return 0;
1662 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
1664 __le64 *step;
1665 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1667 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1668 struct arm_smmu_strtab_l1_desc *l1_desc;
1669 int idx;
1671 /* Two-level walk */
1672 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
1673 l1_desc = &cfg->l1_desc[idx];
1674 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
1675 step = &l1_desc->l2ptr[idx];
1676 } else {
1677 /* Simple linear lookup */
1678 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
1681 return step;
1684 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
1686 int i, j;
1687 struct arm_smmu_master_data *master = fwspec->iommu_priv;
1688 struct arm_smmu_device *smmu = master->smmu;
1690 for (i = 0; i < fwspec->num_ids; ++i) {
1691 u32 sid = fwspec->ids[i];
1692 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
1694 /* Bridged PCI devices may end up with duplicated IDs */
1695 for (j = 0; j < i; j++)
1696 if (fwspec->ids[j] == sid)
1697 break;
1698 if (j < i)
1699 continue;
1701 arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
1705 static void arm_smmu_detach_dev(struct device *dev)
1707 struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
1709 master->ste.assigned = false;
1710 arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1713 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1715 int ret = 0;
1716 struct arm_smmu_device *smmu;
1717 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1718 struct arm_smmu_master_data *master;
1719 struct arm_smmu_strtab_ent *ste;
1721 if (!dev->iommu_fwspec)
1722 return -ENOENT;
1724 master = dev->iommu_fwspec->iommu_priv;
1725 smmu = master->smmu;
1726 ste = &master->ste;
1728 /* Already attached to a different domain? */
1729 if (ste->assigned)
1730 arm_smmu_detach_dev(dev);
1732 mutex_lock(&smmu_domain->init_mutex);
1734 if (!smmu_domain->smmu) {
1735 smmu_domain->smmu = smmu;
1736 ret = arm_smmu_domain_finalise(domain);
1737 if (ret) {
1738 smmu_domain->smmu = NULL;
1739 goto out_unlock;
1741 } else if (smmu_domain->smmu != smmu) {
1742 dev_err(dev,
1743 "cannot attach to SMMU %s (upstream of %s)\n",
1744 dev_name(smmu_domain->smmu->dev),
1745 dev_name(smmu->dev));
1746 ret = -ENXIO;
1747 goto out_unlock;
1750 ste->assigned = true;
1752 if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
1753 ste->s1_cfg = NULL;
1754 ste->s2_cfg = NULL;
1755 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1756 ste->s1_cfg = &smmu_domain->s1_cfg;
1757 ste->s2_cfg = NULL;
1758 arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
1759 } else {
1760 ste->s1_cfg = NULL;
1761 ste->s2_cfg = &smmu_domain->s2_cfg;
1764 arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
1765 out_unlock:
1766 mutex_unlock(&smmu_domain->init_mutex);
1767 return ret;
1770 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1771 phys_addr_t paddr, size_t size, int prot)
1773 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1775 if (!ops)
1776 return -ENODEV;
1778 return ops->map(ops, iova, paddr, size, prot);
1781 static size_t
1782 arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1784 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1786 if (!ops)
1787 return 0;
1789 return ops->unmap(ops, iova, size);
1792 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
1794 struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1796 if (smmu)
1797 __arm_smmu_tlb_sync(smmu);
1800 static phys_addr_t
1801 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1803 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1805 if (domain->type == IOMMU_DOMAIN_IDENTITY)
1806 return iova;
1808 if (!ops)
1809 return 0;
1811 return ops->iova_to_phys(ops, iova);
1814 static struct platform_driver arm_smmu_driver;
1816 static int arm_smmu_match_node(struct device *dev, void *data)
1818 return dev->fwnode == data;
1821 static
1822 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1824 struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1825 fwnode, arm_smmu_match_node);
1826 put_device(dev);
1827 return dev ? dev_get_drvdata(dev) : NULL;
1830 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
1832 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
1834 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
1835 limit *= 1UL << STRTAB_SPLIT;
1837 return sid < limit;
1840 static struct iommu_ops arm_smmu_ops;
1842 static int arm_smmu_add_device(struct device *dev)
1844 int i, ret;
1845 struct arm_smmu_device *smmu;
1846 struct arm_smmu_master_data *master;
1847 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1848 struct iommu_group *group;
1850 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1851 return -ENODEV;
1853 * We _can_ actually withstand dodgy bus code re-calling add_device()
1854 * without an intervening remove_device()/of_xlate() sequence, but
1855 * we're not going to do so quietly...
1857 if (WARN_ON_ONCE(fwspec->iommu_priv)) {
1858 master = fwspec->iommu_priv;
1859 smmu = master->smmu;
1860 } else {
1861 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1862 if (!smmu)
1863 return -ENODEV;
1864 master = kzalloc(sizeof(*master), GFP_KERNEL);
1865 if (!master)
1866 return -ENOMEM;
1868 master->smmu = smmu;
1869 fwspec->iommu_priv = master;
1872 /* Check the SIDs are in range of the SMMU and our stream table */
1873 for (i = 0; i < fwspec->num_ids; i++) {
1874 u32 sid = fwspec->ids[i];
1876 if (!arm_smmu_sid_in_range(smmu, sid))
1877 return -ERANGE;
1879 /* Ensure l2 strtab is initialised */
1880 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
1881 ret = arm_smmu_init_l2_strtab(smmu, sid);
1882 if (ret)
1883 return ret;
1887 group = iommu_group_get_for_dev(dev);
1888 if (!IS_ERR(group)) {
1889 iommu_group_put(group);
1890 iommu_device_link(&smmu->iommu, dev);
1893 return PTR_ERR_OR_ZERO(group);
1896 static void arm_smmu_remove_device(struct device *dev)
1898 struct iommu_fwspec *fwspec = dev->iommu_fwspec;
1899 struct arm_smmu_master_data *master;
1900 struct arm_smmu_device *smmu;
1902 if (!fwspec || fwspec->ops != &arm_smmu_ops)
1903 return;
1905 master = fwspec->iommu_priv;
1906 smmu = master->smmu;
1907 if (master && master->ste.assigned)
1908 arm_smmu_detach_dev(dev);
1909 iommu_group_remove_device(dev);
1910 iommu_device_unlink(&smmu->iommu, dev);
1911 kfree(master);
1912 iommu_fwspec_free(dev);
1915 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1917 struct iommu_group *group;
1920 * We don't support devices sharing stream IDs other than PCI RID
1921 * aliases, since the necessary ID-to-device lookup becomes rather
1922 * impractical given a potential sparse 32-bit stream ID space.
1924 if (dev_is_pci(dev))
1925 group = pci_device_group(dev);
1926 else
1927 group = generic_device_group(dev);
1929 return group;
1932 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
1933 enum iommu_attr attr, void *data)
1935 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1937 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1938 return -EINVAL;
1940 switch (attr) {
1941 case DOMAIN_ATTR_NESTING:
1942 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
1943 return 0;
1944 default:
1945 return -ENODEV;
1949 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
1950 enum iommu_attr attr, void *data)
1952 int ret = 0;
1953 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1955 if (domain->type != IOMMU_DOMAIN_UNMANAGED)
1956 return -EINVAL;
1958 mutex_lock(&smmu_domain->init_mutex);
1960 switch (attr) {
1961 case DOMAIN_ATTR_NESTING:
1962 if (smmu_domain->smmu) {
1963 ret = -EPERM;
1964 goto out_unlock;
1967 if (*(int *)data)
1968 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1969 else
1970 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
1972 break;
1973 default:
1974 ret = -ENODEV;
1977 out_unlock:
1978 mutex_unlock(&smmu_domain->init_mutex);
1979 return ret;
1982 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1984 return iommu_fwspec_add_ids(dev, args->args, 1);
1987 static void arm_smmu_get_resv_regions(struct device *dev,
1988 struct list_head *head)
1990 struct iommu_resv_region *region;
1991 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1993 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1994 prot, IOMMU_RESV_SW_MSI);
1995 if (!region)
1996 return;
1998 list_add_tail(&region->list, head);
2000 iommu_dma_get_resv_regions(dev, head);
2003 static void arm_smmu_put_resv_regions(struct device *dev,
2004 struct list_head *head)
2006 struct iommu_resv_region *entry, *next;
2008 list_for_each_entry_safe(entry, next, head, list)
2009 kfree(entry);
2012 static struct iommu_ops arm_smmu_ops = {
2013 .capable = arm_smmu_capable,
2014 .domain_alloc = arm_smmu_domain_alloc,
2015 .domain_free = arm_smmu_domain_free,
2016 .attach_dev = arm_smmu_attach_dev,
2017 .map = arm_smmu_map,
2018 .unmap = arm_smmu_unmap,
2019 .flush_iotlb_all = arm_smmu_iotlb_sync,
2020 .iotlb_sync = arm_smmu_iotlb_sync,
2021 .iova_to_phys = arm_smmu_iova_to_phys,
2022 .add_device = arm_smmu_add_device,
2023 .remove_device = arm_smmu_remove_device,
2024 .device_group = arm_smmu_device_group,
2025 .domain_get_attr = arm_smmu_domain_get_attr,
2026 .domain_set_attr = arm_smmu_domain_set_attr,
2027 .of_xlate = arm_smmu_of_xlate,
2028 .get_resv_regions = arm_smmu_get_resv_regions,
2029 .put_resv_regions = arm_smmu_put_resv_regions,
2030 .pgsize_bitmap = -1UL, /* Restricted during device attach */
2033 /* Probing and initialisation functions */
2034 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2035 struct arm_smmu_queue *q,
2036 unsigned long prod_off,
2037 unsigned long cons_off,
2038 size_t dwords)
2040 size_t qsz = ((1 << q->max_n_shift) * dwords) << 3;
2042 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL);
2043 if (!q->base) {
2044 dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n",
2045 qsz);
2046 return -ENOMEM;
2049 q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
2050 q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
2051 q->ent_dwords = dwords;
2053 q->q_base = Q_BASE_RWA;
2054 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2055 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
2057 q->prod = q->cons = 0;
2058 return 0;
2061 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2063 int ret;
2065 /* cmdq */
2066 spin_lock_init(&smmu->cmdq.lock);
2067 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
2068 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS);
2069 if (ret)
2070 return ret;
2072 /* evtq */
2073 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
2074 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS);
2075 if (ret)
2076 return ret;
2078 /* priq */
2079 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2080 return 0;
2082 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
2083 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS);
2086 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2088 unsigned int i;
2089 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2090 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
2091 void *strtab = smmu->strtab_cfg.strtab;
2093 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
2094 if (!cfg->l1_desc) {
2095 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
2096 return -ENOMEM;
2099 for (i = 0; i < cfg->num_l1_ents; ++i) {
2100 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2101 strtab += STRTAB_L1_DESC_DWORDS << 3;
2104 return 0;
2107 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2109 void *strtab;
2110 u64 reg;
2111 u32 size, l1size;
2112 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2114 /* Calculate the L1 size, capped to the SIDSIZE. */
2115 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2116 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
2117 cfg->num_l1_ents = 1 << size;
2119 size += STRTAB_SPLIT;
2120 if (size < smmu->sid_bits)
2121 dev_warn(smmu->dev,
2122 "2-level strtab only covers %u/%u bits of SID\n",
2123 size, smmu->sid_bits);
2125 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
2126 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
2127 GFP_KERNEL | __GFP_ZERO);
2128 if (!strtab) {
2129 dev_err(smmu->dev,
2130 "failed to allocate l1 stream table (%u bytes)\n",
2131 size);
2132 return -ENOMEM;
2134 cfg->strtab = strtab;
2136 /* Configure strtab_base_cfg for 2 levels */
2137 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
2138 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
2139 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
2140 cfg->strtab_base_cfg = reg;
2142 return arm_smmu_init_l1_strtab(smmu);
2145 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
2147 void *strtab;
2148 u64 reg;
2149 u32 size;
2150 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2152 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
2153 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
2154 GFP_KERNEL | __GFP_ZERO);
2155 if (!strtab) {
2156 dev_err(smmu->dev,
2157 "failed to allocate linear stream table (%u bytes)\n",
2158 size);
2159 return -ENOMEM;
2161 cfg->strtab = strtab;
2162 cfg->num_l1_ents = 1 << smmu->sid_bits;
2164 /* Configure strtab_base_cfg for a linear table covering all SIDs */
2165 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
2166 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
2167 cfg->strtab_base_cfg = reg;
2169 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
2170 return 0;
2173 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
2175 u64 reg;
2176 int ret;
2178 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2179 ret = arm_smmu_init_strtab_2lvl(smmu);
2180 else
2181 ret = arm_smmu_init_strtab_linear(smmu);
2183 if (ret)
2184 return ret;
2186 /* Set the strtab base address */
2187 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
2188 reg |= STRTAB_BASE_RA;
2189 smmu->strtab_cfg.strtab_base = reg;
2191 /* Allocate the first VMID for stage-2 bypass STEs */
2192 set_bit(0, smmu->vmid_map);
2193 return 0;
2196 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
2198 int ret;
2200 ret = arm_smmu_init_queues(smmu);
2201 if (ret)
2202 return ret;
2204 return arm_smmu_init_strtab(smmu);
2207 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
2208 unsigned int reg_off, unsigned int ack_off)
2210 u32 reg;
2212 writel_relaxed(val, smmu->base + reg_off);
2213 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
2214 1, ARM_SMMU_POLL_TIMEOUT_US);
2217 /* GBPA is "special" */
2218 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
2220 int ret;
2221 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
2223 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2224 1, ARM_SMMU_POLL_TIMEOUT_US);
2225 if (ret)
2226 return ret;
2228 reg &= ~clr;
2229 reg |= set;
2230 writel_relaxed(reg | GBPA_UPDATE, gbpa);
2231 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
2232 1, ARM_SMMU_POLL_TIMEOUT_US);
2234 if (ret)
2235 dev_err(smmu->dev, "GBPA not responding to update\n");
2236 return ret;
2239 static void arm_smmu_free_msis(void *data)
2241 struct device *dev = data;
2242 platform_msi_domain_free_irqs(dev);
2245 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
2247 phys_addr_t doorbell;
2248 struct device *dev = msi_desc_to_dev(desc);
2249 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2250 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
2252 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
2253 doorbell &= MSI_CFG0_ADDR_MASK;
2255 writeq_relaxed(doorbell, smmu->base + cfg[0]);
2256 writel_relaxed(msg->data, smmu->base + cfg[1]);
2257 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
2260 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
2262 struct msi_desc *desc;
2263 int ret, nvec = ARM_SMMU_MAX_MSIS;
2264 struct device *dev = smmu->dev;
2266 /* Clear the MSI address regs */
2267 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
2268 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
2270 if (smmu->features & ARM_SMMU_FEAT_PRI)
2271 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
2272 else
2273 nvec--;
2275 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
2276 return;
2278 if (!dev->msi_domain) {
2279 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
2280 return;
2283 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
2284 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
2285 if (ret) {
2286 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
2287 return;
2290 for_each_msi_entry(desc, dev) {
2291 switch (desc->platform.msi_index) {
2292 case EVTQ_MSI_INDEX:
2293 smmu->evtq.q.irq = desc->irq;
2294 break;
2295 case GERROR_MSI_INDEX:
2296 smmu->gerr_irq = desc->irq;
2297 break;
2298 case PRIQ_MSI_INDEX:
2299 smmu->priq.q.irq = desc->irq;
2300 break;
2301 default: /* Unknown */
2302 continue;
2306 /* Add callback to free MSIs on teardown */
2307 devm_add_action(dev, arm_smmu_free_msis, dev);
2310 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
2312 int irq, ret;
2314 arm_smmu_setup_msis(smmu);
2316 /* Request interrupt lines */
2317 irq = smmu->evtq.q.irq;
2318 if (irq) {
2319 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2320 arm_smmu_evtq_thread,
2321 IRQF_ONESHOT,
2322 "arm-smmu-v3-evtq", smmu);
2323 if (ret < 0)
2324 dev_warn(smmu->dev, "failed to enable evtq irq\n");
2325 } else {
2326 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
2329 irq = smmu->gerr_irq;
2330 if (irq) {
2331 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
2332 0, "arm-smmu-v3-gerror", smmu);
2333 if (ret < 0)
2334 dev_warn(smmu->dev, "failed to enable gerror irq\n");
2335 } else {
2336 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
2339 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2340 irq = smmu->priq.q.irq;
2341 if (irq) {
2342 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
2343 arm_smmu_priq_thread,
2344 IRQF_ONESHOT,
2345 "arm-smmu-v3-priq",
2346 smmu);
2347 if (ret < 0)
2348 dev_warn(smmu->dev,
2349 "failed to enable priq irq\n");
2350 } else {
2351 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
2356 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
2358 int ret, irq;
2359 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
2361 /* Disable IRQs first */
2362 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
2363 ARM_SMMU_IRQ_CTRLACK);
2364 if (ret) {
2365 dev_err(smmu->dev, "failed to disable irqs\n");
2366 return ret;
2369 irq = smmu->combined_irq;
2370 if (irq) {
2372 * Cavium ThunderX2 implementation doesn't not support unique
2373 * irq lines. Use single irq line for all the SMMUv3 interrupts.
2375 ret = devm_request_threaded_irq(smmu->dev, irq,
2376 arm_smmu_combined_irq_handler,
2377 arm_smmu_combined_irq_thread,
2378 IRQF_ONESHOT,
2379 "arm-smmu-v3-combined-irq", smmu);
2380 if (ret < 0)
2381 dev_warn(smmu->dev, "failed to enable combined irq\n");
2382 } else
2383 arm_smmu_setup_unique_irqs(smmu);
2385 if (smmu->features & ARM_SMMU_FEAT_PRI)
2386 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
2388 /* Enable interrupt generation on the SMMU */
2389 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
2390 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
2391 if (ret)
2392 dev_warn(smmu->dev, "failed to enable irqs\n");
2394 return 0;
2397 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
2399 int ret;
2401 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
2402 if (ret)
2403 dev_err(smmu->dev, "failed to clear cr0\n");
2405 return ret;
2408 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
2410 int ret;
2411 u32 reg, enables;
2412 struct arm_smmu_cmdq_ent cmd;
2414 /* Clear CR0 and sync (disables SMMU and queue processing) */
2415 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
2416 if (reg & CR0_SMMUEN) {
2417 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
2418 WARN_ON(is_kdump_kernel() && !disable_bypass);
2419 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
2422 ret = arm_smmu_device_disable(smmu);
2423 if (ret)
2424 return ret;
2426 /* CR1 (table and queue memory attributes) */
2427 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
2428 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
2429 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
2430 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
2431 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
2432 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
2433 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
2435 /* CR2 (random crap) */
2436 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
2437 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
2439 /* Stream table */
2440 writeq_relaxed(smmu->strtab_cfg.strtab_base,
2441 smmu->base + ARM_SMMU_STRTAB_BASE);
2442 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
2443 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
2445 /* Command queue */
2446 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
2447 writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
2448 writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
2450 enables = CR0_CMDQEN;
2451 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2452 ARM_SMMU_CR0ACK);
2453 if (ret) {
2454 dev_err(smmu->dev, "failed to enable command queue\n");
2455 return ret;
2458 /* Invalidate any cached configuration */
2459 cmd.opcode = CMDQ_OP_CFGI_ALL;
2460 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2461 arm_smmu_cmdq_issue_sync(smmu);
2463 /* Invalidate any stale TLB entries */
2464 if (smmu->features & ARM_SMMU_FEAT_HYP) {
2465 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
2466 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2469 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
2470 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2471 arm_smmu_cmdq_issue_sync(smmu);
2473 /* Event queue */
2474 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
2475 writel_relaxed(smmu->evtq.q.prod,
2476 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
2477 writel_relaxed(smmu->evtq.q.cons,
2478 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
2480 enables |= CR0_EVTQEN;
2481 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2482 ARM_SMMU_CR0ACK);
2483 if (ret) {
2484 dev_err(smmu->dev, "failed to enable event queue\n");
2485 return ret;
2488 /* PRI queue */
2489 if (smmu->features & ARM_SMMU_FEAT_PRI) {
2490 writeq_relaxed(smmu->priq.q.q_base,
2491 smmu->base + ARM_SMMU_PRIQ_BASE);
2492 writel_relaxed(smmu->priq.q.prod,
2493 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
2494 writel_relaxed(smmu->priq.q.cons,
2495 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
2497 enables |= CR0_PRIQEN;
2498 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2499 ARM_SMMU_CR0ACK);
2500 if (ret) {
2501 dev_err(smmu->dev, "failed to enable PRI queue\n");
2502 return ret;
2506 ret = arm_smmu_setup_irqs(smmu);
2507 if (ret) {
2508 dev_err(smmu->dev, "failed to setup irqs\n");
2509 return ret;
2512 if (is_kdump_kernel())
2513 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
2515 /* Enable the SMMU interface, or ensure bypass */
2516 if (!bypass || disable_bypass) {
2517 enables |= CR0_SMMUEN;
2518 } else {
2519 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
2520 if (ret)
2521 return ret;
2523 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
2524 ARM_SMMU_CR0ACK);
2525 if (ret) {
2526 dev_err(smmu->dev, "failed to enable SMMU interface\n");
2527 return ret;
2530 return 0;
2533 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
2535 u32 reg;
2536 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
2538 /* IDR0 */
2539 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
2541 /* 2-level structures */
2542 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
2543 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
2545 if (reg & IDR0_CD2L)
2546 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
2549 * Translation table endianness.
2550 * We currently require the same endianness as the CPU, but this
2551 * could be changed later by adding a new IO_PGTABLE_QUIRK.
2553 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
2554 case IDR0_TTENDIAN_MIXED:
2555 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
2556 break;
2557 #ifdef __BIG_ENDIAN
2558 case IDR0_TTENDIAN_BE:
2559 smmu->features |= ARM_SMMU_FEAT_TT_BE;
2560 break;
2561 #else
2562 case IDR0_TTENDIAN_LE:
2563 smmu->features |= ARM_SMMU_FEAT_TT_LE;
2564 break;
2565 #endif
2566 default:
2567 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
2568 return -ENXIO;
2571 /* Boolean feature flags */
2572 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
2573 smmu->features |= ARM_SMMU_FEAT_PRI;
2575 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
2576 smmu->features |= ARM_SMMU_FEAT_ATS;
2578 if (reg & IDR0_SEV)
2579 smmu->features |= ARM_SMMU_FEAT_SEV;
2581 if (reg & IDR0_MSI)
2582 smmu->features |= ARM_SMMU_FEAT_MSI;
2584 if (reg & IDR0_HYP)
2585 smmu->features |= ARM_SMMU_FEAT_HYP;
2588 * The coherency feature as set by FW is used in preference to the ID
2589 * register, but warn on mismatch.
2591 if (!!(reg & IDR0_COHACC) != coherent)
2592 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
2593 coherent ? "true" : "false");
2595 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
2596 case IDR0_STALL_MODEL_FORCE:
2597 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
2598 /* Fallthrough */
2599 case IDR0_STALL_MODEL_STALL:
2600 smmu->features |= ARM_SMMU_FEAT_STALLS;
2603 if (reg & IDR0_S1P)
2604 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
2606 if (reg & IDR0_S2P)
2607 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
2609 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
2610 dev_err(smmu->dev, "no translation support!\n");
2611 return -ENXIO;
2614 /* We only support the AArch64 table format at present */
2615 switch (FIELD_GET(IDR0_TTF, reg)) {
2616 case IDR0_TTF_AARCH32_64:
2617 smmu->ias = 40;
2618 /* Fallthrough */
2619 case IDR0_TTF_AARCH64:
2620 break;
2621 default:
2622 dev_err(smmu->dev, "AArch64 table format not supported!\n");
2623 return -ENXIO;
2626 /* ASID/VMID sizes */
2627 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
2628 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
2630 /* IDR1 */
2631 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
2632 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
2633 dev_err(smmu->dev, "embedded implementation not supported\n");
2634 return -ENXIO;
2637 /* Queue sizes, capped at 4k */
2638 smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
2639 FIELD_GET(IDR1_CMDQS, reg));
2640 if (!smmu->cmdq.q.max_n_shift) {
2641 /* Odd alignment restrictions on the base, so ignore for now */
2642 dev_err(smmu->dev, "unit-length command queue not supported\n");
2643 return -ENXIO;
2646 smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
2647 FIELD_GET(IDR1_EVTQS, reg));
2648 smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
2649 FIELD_GET(IDR1_PRIQS, reg));
2651 /* SID/SSID sizes */
2652 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
2653 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
2656 * If the SMMU supports fewer bits than would fill a single L2 stream
2657 * table, use a linear table instead.
2659 if (smmu->sid_bits <= STRTAB_SPLIT)
2660 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
2662 /* IDR5 */
2663 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
2665 /* Maximum number of outstanding stalls */
2666 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
2668 /* Page sizes */
2669 if (reg & IDR5_GRAN64K)
2670 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
2671 if (reg & IDR5_GRAN16K)
2672 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
2673 if (reg & IDR5_GRAN4K)
2674 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
2676 /* Input address size */
2677 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
2678 smmu->features |= ARM_SMMU_FEAT_VAX;
2680 /* Output address size */
2681 switch (FIELD_GET(IDR5_OAS, reg)) {
2682 case IDR5_OAS_32_BIT:
2683 smmu->oas = 32;
2684 break;
2685 case IDR5_OAS_36_BIT:
2686 smmu->oas = 36;
2687 break;
2688 case IDR5_OAS_40_BIT:
2689 smmu->oas = 40;
2690 break;
2691 case IDR5_OAS_42_BIT:
2692 smmu->oas = 42;
2693 break;
2694 case IDR5_OAS_44_BIT:
2695 smmu->oas = 44;
2696 break;
2697 case IDR5_OAS_52_BIT:
2698 smmu->oas = 52;
2699 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
2700 break;
2701 default:
2702 dev_info(smmu->dev,
2703 "unknown output address size. Truncating to 48-bit\n");
2704 /* Fallthrough */
2705 case IDR5_OAS_48_BIT:
2706 smmu->oas = 48;
2709 if (arm_smmu_ops.pgsize_bitmap == -1UL)
2710 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
2711 else
2712 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
2714 /* Set the DMA mask for our table walker */
2715 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
2716 dev_warn(smmu->dev,
2717 "failed to set DMA mask for table walker\n");
2719 smmu->ias = max(smmu->ias, smmu->oas);
2721 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
2722 smmu->ias, smmu->oas, smmu->features);
2723 return 0;
2726 #ifdef CONFIG_ACPI
2727 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
2729 switch (model) {
2730 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
2731 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
2732 break;
2733 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
2734 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
2735 break;
2738 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
2741 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2742 struct arm_smmu_device *smmu)
2744 struct acpi_iort_smmu_v3 *iort_smmu;
2745 struct device *dev = smmu->dev;
2746 struct acpi_iort_node *node;
2748 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
2750 /* Retrieve SMMUv3 specific data */
2751 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
2753 acpi_smmu_get_options(iort_smmu->model, smmu);
2755 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
2756 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2758 return 0;
2760 #else
2761 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
2762 struct arm_smmu_device *smmu)
2764 return -ENODEV;
2766 #endif
2768 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
2769 struct arm_smmu_device *smmu)
2771 struct device *dev = &pdev->dev;
2772 u32 cells;
2773 int ret = -EINVAL;
2775 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
2776 dev_err(dev, "missing #iommu-cells property\n");
2777 else if (cells != 1)
2778 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
2779 else
2780 ret = 0;
2782 parse_driver_options(smmu);
2784 if (of_dma_is_coherent(dev->of_node))
2785 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
2787 return ret;
2790 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
2792 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
2793 return SZ_64K;
2794 else
2795 return SZ_128K;
2798 static int arm_smmu_device_probe(struct platform_device *pdev)
2800 int irq, ret;
2801 struct resource *res;
2802 resource_size_t ioaddr;
2803 struct arm_smmu_device *smmu;
2804 struct device *dev = &pdev->dev;
2805 bool bypass;
2807 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2808 if (!smmu) {
2809 dev_err(dev, "failed to allocate arm_smmu_device\n");
2810 return -ENOMEM;
2812 smmu->dev = dev;
2814 if (dev->of_node) {
2815 ret = arm_smmu_device_dt_probe(pdev, smmu);
2816 } else {
2817 ret = arm_smmu_device_acpi_probe(pdev, smmu);
2818 if (ret == -ENODEV)
2819 return ret;
2822 /* Set bypass mode according to firmware probing result */
2823 bypass = !!ret;
2825 /* Base address */
2826 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2827 if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
2828 dev_err(dev, "MMIO region too small (%pr)\n", res);
2829 return -EINVAL;
2831 ioaddr = res->start;
2833 smmu->base = devm_ioremap_resource(dev, res);
2834 if (IS_ERR(smmu->base))
2835 return PTR_ERR(smmu->base);
2837 /* Interrupt lines */
2839 irq = platform_get_irq_byname(pdev, "combined");
2840 if (irq > 0)
2841 smmu->combined_irq = irq;
2842 else {
2843 irq = platform_get_irq_byname(pdev, "eventq");
2844 if (irq > 0)
2845 smmu->evtq.q.irq = irq;
2847 irq = platform_get_irq_byname(pdev, "priq");
2848 if (irq > 0)
2849 smmu->priq.q.irq = irq;
2851 irq = platform_get_irq_byname(pdev, "gerror");
2852 if (irq > 0)
2853 smmu->gerr_irq = irq;
2855 /* Probe the h/w */
2856 ret = arm_smmu_device_hw_probe(smmu);
2857 if (ret)
2858 return ret;
2860 /* Initialise in-memory data structures */
2861 ret = arm_smmu_init_structures(smmu);
2862 if (ret)
2863 return ret;
2865 /* Record our private device structure */
2866 platform_set_drvdata(pdev, smmu);
2868 /* Reset the device */
2869 ret = arm_smmu_device_reset(smmu, bypass);
2870 if (ret)
2871 return ret;
2873 /* And we're up. Go go go! */
2874 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
2875 "smmu3.%pa", &ioaddr);
2876 if (ret)
2877 return ret;
2879 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
2880 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
2882 ret = iommu_device_register(&smmu->iommu);
2883 if (ret) {
2884 dev_err(dev, "Failed to register iommu\n");
2885 return ret;
2888 #ifdef CONFIG_PCI
2889 if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
2890 pci_request_acs();
2891 ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
2892 if (ret)
2893 return ret;
2895 #endif
2896 #ifdef CONFIG_ARM_AMBA
2897 if (amba_bustype.iommu_ops != &arm_smmu_ops) {
2898 ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
2899 if (ret)
2900 return ret;
2902 #endif
2903 if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
2904 ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
2905 if (ret)
2906 return ret;
2908 return 0;
2911 static int arm_smmu_device_remove(struct platform_device *pdev)
2913 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2915 arm_smmu_device_disable(smmu);
2917 return 0;
2920 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2922 arm_smmu_device_remove(pdev);
2925 static const struct of_device_id arm_smmu_of_match[] = {
2926 { .compatible = "arm,smmu-v3", },
2927 { },
2929 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
2931 static struct platform_driver arm_smmu_driver = {
2932 .driver = {
2933 .name = "arm-smmu-v3",
2934 .of_match_table = of_match_ptr(arm_smmu_of_match),
2936 .probe = arm_smmu_device_probe,
2937 .remove = arm_smmu_device_remove,
2938 .shutdown = arm_smmu_device_shutdown,
2940 module_platform_driver(arm_smmu_driver);
2942 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
2943 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
2944 MODULE_LICENSE("GPL v2");