Revert "tty: hvc: Fix data abort due to race in hvc_open"
[linux/fpc-iii.git] / drivers / iommu / arm-smmu-v3.c
blobaf21d24a09e8887937fb86c2d3cb9db1858979e2
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * IOMMU API for ARM architected SMMUv3 implementations.
5 * Copyright (C) 2015 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
9 * This driver is powered by bad coffee and bombay mix.
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitfield.h>
15 #include <linux/bitops.h>
16 #include <linux/crash_dump.h>
17 #include <linux/delay.h>
18 #include <linux/dma-iommu.h>
19 #include <linux/err.h>
20 #include <linux/interrupt.h>
21 #include <linux/io-pgtable.h>
22 #include <linux/iommu.h>
23 #include <linux/iopoll.h>
24 #include <linux/module.h>
25 #include <linux/msi.h>
26 #include <linux/of.h>
27 #include <linux/of_address.h>
28 #include <linux/of_iommu.h>
29 #include <linux/of_platform.h>
30 #include <linux/pci.h>
31 #include <linux/pci-ats.h>
32 #include <linux/platform_device.h>
34 #include <linux/amba/bus.h>
36 /* MMIO registers */
37 #define ARM_SMMU_IDR0 0x0
38 #define IDR0_ST_LVL GENMASK(28, 27)
39 #define IDR0_ST_LVL_2LVL 1
40 #define IDR0_STALL_MODEL GENMASK(25, 24)
41 #define IDR0_STALL_MODEL_STALL 0
42 #define IDR0_STALL_MODEL_FORCE 2
43 #define IDR0_TTENDIAN GENMASK(22, 21)
44 #define IDR0_TTENDIAN_MIXED 0
45 #define IDR0_TTENDIAN_LE 2
46 #define IDR0_TTENDIAN_BE 3
47 #define IDR0_CD2L (1 << 19)
48 #define IDR0_VMID16 (1 << 18)
49 #define IDR0_PRI (1 << 16)
50 #define IDR0_SEV (1 << 14)
51 #define IDR0_MSI (1 << 13)
52 #define IDR0_ASID16 (1 << 12)
53 #define IDR0_ATS (1 << 10)
54 #define IDR0_HYP (1 << 9)
55 #define IDR0_COHACC (1 << 4)
56 #define IDR0_TTF GENMASK(3, 2)
57 #define IDR0_TTF_AARCH64 2
58 #define IDR0_TTF_AARCH32_64 3
59 #define IDR0_S1P (1 << 1)
60 #define IDR0_S2P (1 << 0)
62 #define ARM_SMMU_IDR1 0x4
63 #define IDR1_TABLES_PRESET (1 << 30)
64 #define IDR1_QUEUES_PRESET (1 << 29)
65 #define IDR1_REL (1 << 28)
66 #define IDR1_CMDQS GENMASK(25, 21)
67 #define IDR1_EVTQS GENMASK(20, 16)
68 #define IDR1_PRIQS GENMASK(15, 11)
69 #define IDR1_SSIDSIZE GENMASK(10, 6)
70 #define IDR1_SIDSIZE GENMASK(5, 0)
72 #define ARM_SMMU_IDR3 0xc
73 #define IDR3_RIL (1 << 10)
75 #define ARM_SMMU_IDR5 0x14
76 #define IDR5_STALL_MAX GENMASK(31, 16)
77 #define IDR5_GRAN64K (1 << 6)
78 #define IDR5_GRAN16K (1 << 5)
79 #define IDR5_GRAN4K (1 << 4)
80 #define IDR5_OAS GENMASK(2, 0)
81 #define IDR5_OAS_32_BIT 0
82 #define IDR5_OAS_36_BIT 1
83 #define IDR5_OAS_40_BIT 2
84 #define IDR5_OAS_42_BIT 3
85 #define IDR5_OAS_44_BIT 4
86 #define IDR5_OAS_48_BIT 5
87 #define IDR5_OAS_52_BIT 6
88 #define IDR5_VAX GENMASK(11, 10)
89 #define IDR5_VAX_52_BIT 1
91 #define ARM_SMMU_CR0 0x20
92 #define CR0_ATSCHK (1 << 4)
93 #define CR0_CMDQEN (1 << 3)
94 #define CR0_EVTQEN (1 << 2)
95 #define CR0_PRIQEN (1 << 1)
96 #define CR0_SMMUEN (1 << 0)
98 #define ARM_SMMU_CR0ACK 0x24
100 #define ARM_SMMU_CR1 0x28
101 #define CR1_TABLE_SH GENMASK(11, 10)
102 #define CR1_TABLE_OC GENMASK(9, 8)
103 #define CR1_TABLE_IC GENMASK(7, 6)
104 #define CR1_QUEUE_SH GENMASK(5, 4)
105 #define CR1_QUEUE_OC GENMASK(3, 2)
106 #define CR1_QUEUE_IC GENMASK(1, 0)
107 /* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
108 #define CR1_CACHE_NC 0
109 #define CR1_CACHE_WB 1
110 #define CR1_CACHE_WT 2
112 #define ARM_SMMU_CR2 0x2c
113 #define CR2_PTM (1 << 2)
114 #define CR2_RECINVSID (1 << 1)
115 #define CR2_E2H (1 << 0)
117 #define ARM_SMMU_GBPA 0x44
118 #define GBPA_UPDATE (1 << 31)
119 #define GBPA_ABORT (1 << 20)
121 #define ARM_SMMU_IRQ_CTRL 0x50
122 #define IRQ_CTRL_EVTQ_IRQEN (1 << 2)
123 #define IRQ_CTRL_PRIQ_IRQEN (1 << 1)
124 #define IRQ_CTRL_GERROR_IRQEN (1 << 0)
126 #define ARM_SMMU_IRQ_CTRLACK 0x54
128 #define ARM_SMMU_GERROR 0x60
129 #define GERROR_SFM_ERR (1 << 8)
130 #define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
131 #define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
132 #define GERROR_MSI_EVTQ_ABT_ERR (1 << 5)
133 #define GERROR_MSI_CMDQ_ABT_ERR (1 << 4)
134 #define GERROR_PRIQ_ABT_ERR (1 << 3)
135 #define GERROR_EVTQ_ABT_ERR (1 << 2)
136 #define GERROR_CMDQ_ERR (1 << 0)
137 #define GERROR_ERR_MASK 0xfd
139 #define ARM_SMMU_GERRORN 0x64
141 #define ARM_SMMU_GERROR_IRQ_CFG0 0x68
142 #define ARM_SMMU_GERROR_IRQ_CFG1 0x70
143 #define ARM_SMMU_GERROR_IRQ_CFG2 0x74
145 #define ARM_SMMU_STRTAB_BASE 0x80
146 #define STRTAB_BASE_RA (1UL << 62)
147 #define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6)
149 #define ARM_SMMU_STRTAB_BASE_CFG 0x88
150 #define STRTAB_BASE_CFG_FMT GENMASK(17, 16)
151 #define STRTAB_BASE_CFG_FMT_LINEAR 0
152 #define STRTAB_BASE_CFG_FMT_2LVL 1
153 #define STRTAB_BASE_CFG_SPLIT GENMASK(10, 6)
154 #define STRTAB_BASE_CFG_LOG2SIZE GENMASK(5, 0)
156 #define ARM_SMMU_CMDQ_BASE 0x90
157 #define ARM_SMMU_CMDQ_PROD 0x98
158 #define ARM_SMMU_CMDQ_CONS 0x9c
160 #define ARM_SMMU_EVTQ_BASE 0xa0
161 #define ARM_SMMU_EVTQ_PROD 0x100a8
162 #define ARM_SMMU_EVTQ_CONS 0x100ac
163 #define ARM_SMMU_EVTQ_IRQ_CFG0 0xb0
164 #define ARM_SMMU_EVTQ_IRQ_CFG1 0xb8
165 #define ARM_SMMU_EVTQ_IRQ_CFG2 0xbc
167 #define ARM_SMMU_PRIQ_BASE 0xc0
168 #define ARM_SMMU_PRIQ_PROD 0x100c8
169 #define ARM_SMMU_PRIQ_CONS 0x100cc
170 #define ARM_SMMU_PRIQ_IRQ_CFG0 0xd0
171 #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
172 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
174 #define ARM_SMMU_REG_SZ 0xe00
176 /* Common MSI config fields */
177 #define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2)
178 #define MSI_CFG2_SH GENMASK(5, 4)
179 #define MSI_CFG2_MEMATTR GENMASK(3, 0)
181 /* Common memory attribute values */
182 #define ARM_SMMU_SH_NSH 0
183 #define ARM_SMMU_SH_OSH 2
184 #define ARM_SMMU_SH_ISH 3
185 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE 0x1
186 #define ARM_SMMU_MEMATTR_OIWB 0xf
188 #define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
189 #define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
190 #define Q_OVERFLOW_FLAG (1U << 31)
191 #define Q_OVF(p) ((p) & Q_OVERFLOW_FLAG)
192 #define Q_ENT(q, p) ((q)->base + \
193 Q_IDX(&((q)->llq), p) * \
194 (q)->ent_dwords)
196 #define Q_BASE_RWA (1UL << 62)
197 #define Q_BASE_ADDR_MASK GENMASK_ULL(51, 5)
198 #define Q_BASE_LOG2SIZE GENMASK(4, 0)
200 /* Ensure DMA allocations are naturally aligned */
201 #ifdef CONFIG_CMA_ALIGNMENT
202 #define Q_MAX_SZ_SHIFT (PAGE_SHIFT + CONFIG_CMA_ALIGNMENT)
203 #else
204 #define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1)
205 #endif
208 * Stream table.
210 * Linear: Enough to cover 1 << IDR1.SIDSIZE entries
211 * 2lvl: 128k L1 entries,
212 * 256 lazy entries per table (each table covers a PCI bus)
214 #define STRTAB_L1_SZ_SHIFT 20
215 #define STRTAB_SPLIT 8
217 #define STRTAB_L1_DESC_DWORDS 1
218 #define STRTAB_L1_DESC_SPAN GENMASK_ULL(4, 0)
219 #define STRTAB_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 6)
221 #define STRTAB_STE_DWORDS 8
222 #define STRTAB_STE_0_V (1UL << 0)
223 #define STRTAB_STE_0_CFG GENMASK_ULL(3, 1)
224 #define STRTAB_STE_0_CFG_ABORT 0
225 #define STRTAB_STE_0_CFG_BYPASS 4
226 #define STRTAB_STE_0_CFG_S1_TRANS 5
227 #define STRTAB_STE_0_CFG_S2_TRANS 6
229 #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
230 #define STRTAB_STE_0_S1FMT_LINEAR 0
231 #define STRTAB_STE_0_S1FMT_64K_L2 2
232 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6)
233 #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59)
235 #define STRTAB_STE_1_S1DSS GENMASK_ULL(1, 0)
236 #define STRTAB_STE_1_S1DSS_TERMINATE 0x0
237 #define STRTAB_STE_1_S1DSS_BYPASS 0x1
238 #define STRTAB_STE_1_S1DSS_SSID0 0x2
240 #define STRTAB_STE_1_S1C_CACHE_NC 0UL
241 #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL
242 #define STRTAB_STE_1_S1C_CACHE_WT 2UL
243 #define STRTAB_STE_1_S1C_CACHE_WB 3UL
244 #define STRTAB_STE_1_S1CIR GENMASK_ULL(3, 2)
245 #define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
246 #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
248 #define STRTAB_STE_1_S1STALLD (1UL << 27)
250 #define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
251 #define STRTAB_STE_1_EATS_ABT 0UL
252 #define STRTAB_STE_1_EATS_TRANS 1UL
253 #define STRTAB_STE_1_EATS_S1CHK 2UL
255 #define STRTAB_STE_1_STRW GENMASK_ULL(31, 30)
256 #define STRTAB_STE_1_STRW_NSEL1 0UL
257 #define STRTAB_STE_1_STRW_EL2 2UL
259 #define STRTAB_STE_1_SHCFG GENMASK_ULL(45, 44)
260 #define STRTAB_STE_1_SHCFG_INCOMING 1UL
262 #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0)
263 #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32)
264 #define STRTAB_STE_2_VTCR_S2T0SZ GENMASK_ULL(5, 0)
265 #define STRTAB_STE_2_VTCR_S2SL0 GENMASK_ULL(7, 6)
266 #define STRTAB_STE_2_VTCR_S2IR0 GENMASK_ULL(9, 8)
267 #define STRTAB_STE_2_VTCR_S2OR0 GENMASK_ULL(11, 10)
268 #define STRTAB_STE_2_VTCR_S2SH0 GENMASK_ULL(13, 12)
269 #define STRTAB_STE_2_VTCR_S2TG GENMASK_ULL(15, 14)
270 #define STRTAB_STE_2_VTCR_S2PS GENMASK_ULL(18, 16)
271 #define STRTAB_STE_2_S2AA64 (1UL << 51)
272 #define STRTAB_STE_2_S2ENDI (1UL << 52)
273 #define STRTAB_STE_2_S2PTW (1UL << 54)
274 #define STRTAB_STE_2_S2R (1UL << 58)
276 #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
279 * Context descriptors.
281 * Linear: when less than 1024 SSIDs are supported
282 * 2lvl: at most 1024 L1 entries,
283 * 1024 lazy entries per table.
285 #define CTXDESC_SPLIT 10
286 #define CTXDESC_L2_ENTRIES (1 << CTXDESC_SPLIT)
288 #define CTXDESC_L1_DESC_DWORDS 1
289 #define CTXDESC_L1_DESC_V (1UL << 0)
290 #define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12)
292 #define CTXDESC_CD_DWORDS 8
293 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0)
294 #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6)
295 #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8)
296 #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10)
297 #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12)
298 #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14)
299 #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30)
301 #define CTXDESC_CD_0_ENDI (1UL << 15)
302 #define CTXDESC_CD_0_V (1UL << 31)
304 #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32)
305 #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38)
307 #define CTXDESC_CD_0_AA64 (1UL << 41)
308 #define CTXDESC_CD_0_S (1UL << 44)
309 #define CTXDESC_CD_0_R (1UL << 45)
310 #define CTXDESC_CD_0_A (1UL << 46)
311 #define CTXDESC_CD_0_ASET (1UL << 47)
312 #define CTXDESC_CD_0_ASID GENMASK_ULL(63, 48)
314 #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
317 * When the SMMU only supports linear context descriptor tables, pick a
318 * reasonable size limit (64kB).
320 #define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3))
322 /* Command queue */
323 #define CMDQ_ENT_SZ_SHIFT 4
324 #define CMDQ_ENT_DWORDS ((1 << CMDQ_ENT_SZ_SHIFT) >> 3)
325 #define CMDQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - CMDQ_ENT_SZ_SHIFT)
327 #define CMDQ_CONS_ERR GENMASK(30, 24)
328 #define CMDQ_ERR_CERROR_NONE_IDX 0
329 #define CMDQ_ERR_CERROR_ILL_IDX 1
330 #define CMDQ_ERR_CERROR_ABT_IDX 2
331 #define CMDQ_ERR_CERROR_ATC_INV_IDX 3
333 #define CMDQ_PROD_OWNED_FLAG Q_OVERFLOW_FLAG
336 * This is used to size the command queue and therefore must be at least
337 * BITS_PER_LONG so that the valid_map works correctly (it relies on the
338 * total number of queue entries being a multiple of BITS_PER_LONG).
340 #define CMDQ_BATCH_ENTRIES BITS_PER_LONG
342 #define CMDQ_0_OP GENMASK_ULL(7, 0)
343 #define CMDQ_0_SSV (1UL << 11)
345 #define CMDQ_PREFETCH_0_SID GENMASK_ULL(63, 32)
346 #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0)
347 #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12)
349 #define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12)
350 #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32)
351 #define CMDQ_CFGI_1_LEAF (1UL << 0)
352 #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0)
354 #define CMDQ_TLBI_0_NUM GENMASK_ULL(16, 12)
355 #define CMDQ_TLBI_RANGE_NUM_MAX 31
356 #define CMDQ_TLBI_0_SCALE GENMASK_ULL(24, 20)
357 #define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32)
358 #define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48)
359 #define CMDQ_TLBI_1_LEAF (1UL << 0)
360 #define CMDQ_TLBI_1_TTL GENMASK_ULL(9, 8)
361 #define CMDQ_TLBI_1_TG GENMASK_ULL(11, 10)
362 #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12)
363 #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12)
365 #define CMDQ_ATC_0_SSID GENMASK_ULL(31, 12)
366 #define CMDQ_ATC_0_SID GENMASK_ULL(63, 32)
367 #define CMDQ_ATC_0_GLOBAL (1UL << 9)
368 #define CMDQ_ATC_1_SIZE GENMASK_ULL(5, 0)
369 #define CMDQ_ATC_1_ADDR_MASK GENMASK_ULL(63, 12)
371 #define CMDQ_PRI_0_SSID GENMASK_ULL(31, 12)
372 #define CMDQ_PRI_0_SID GENMASK_ULL(63, 32)
373 #define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
374 #define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
376 #define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
377 #define CMDQ_SYNC_0_CS_NONE 0
378 #define CMDQ_SYNC_0_CS_IRQ 1
379 #define CMDQ_SYNC_0_CS_SEV 2
380 #define CMDQ_SYNC_0_MSH GENMASK_ULL(23, 22)
381 #define CMDQ_SYNC_0_MSIATTR GENMASK_ULL(27, 24)
382 #define CMDQ_SYNC_0_MSIDATA GENMASK_ULL(63, 32)
383 #define CMDQ_SYNC_1_MSIADDR_MASK GENMASK_ULL(51, 2)
385 /* Event queue */
386 #define EVTQ_ENT_SZ_SHIFT 5
387 #define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
388 #define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
390 #define EVTQ_0_ID GENMASK_ULL(7, 0)
392 /* PRI queue */
393 #define PRIQ_ENT_SZ_SHIFT 4
394 #define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
395 #define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
397 #define PRIQ_0_SID GENMASK_ULL(31, 0)
398 #define PRIQ_0_SSID GENMASK_ULL(51, 32)
399 #define PRIQ_0_PERM_PRIV (1UL << 58)
400 #define PRIQ_0_PERM_EXEC (1UL << 59)
401 #define PRIQ_0_PERM_READ (1UL << 60)
402 #define PRIQ_0_PERM_WRITE (1UL << 61)
403 #define PRIQ_0_PRG_LAST (1UL << 62)
404 #define PRIQ_0_SSID_V (1UL << 63)
406 #define PRIQ_1_PRG_IDX GENMASK_ULL(8, 0)
407 #define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
409 /* High-level queue structures */
410 #define ARM_SMMU_POLL_TIMEOUT_US 1000000 /* 1s! */
411 #define ARM_SMMU_POLL_SPIN_COUNT 10
413 #define MSI_IOVA_BASE 0x8000000
414 #define MSI_IOVA_LENGTH 0x100000
416 static bool disable_bypass = 1;
417 module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
418 MODULE_PARM_DESC(disable_bypass,
419 "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
421 enum pri_resp {
422 PRI_RESP_DENY = 0,
423 PRI_RESP_FAIL = 1,
424 PRI_RESP_SUCC = 2,
427 enum arm_smmu_msi_index {
428 EVTQ_MSI_INDEX,
429 GERROR_MSI_INDEX,
430 PRIQ_MSI_INDEX,
431 ARM_SMMU_MAX_MSIS,
434 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
435 [EVTQ_MSI_INDEX] = {
436 ARM_SMMU_EVTQ_IRQ_CFG0,
437 ARM_SMMU_EVTQ_IRQ_CFG1,
438 ARM_SMMU_EVTQ_IRQ_CFG2,
440 [GERROR_MSI_INDEX] = {
441 ARM_SMMU_GERROR_IRQ_CFG0,
442 ARM_SMMU_GERROR_IRQ_CFG1,
443 ARM_SMMU_GERROR_IRQ_CFG2,
445 [PRIQ_MSI_INDEX] = {
446 ARM_SMMU_PRIQ_IRQ_CFG0,
447 ARM_SMMU_PRIQ_IRQ_CFG1,
448 ARM_SMMU_PRIQ_IRQ_CFG2,
452 struct arm_smmu_cmdq_ent {
453 /* Common fields */
454 u8 opcode;
455 bool substream_valid;
457 /* Command-specific fields */
458 union {
459 #define CMDQ_OP_PREFETCH_CFG 0x1
460 struct {
461 u32 sid;
462 u8 size;
463 u64 addr;
464 } prefetch;
466 #define CMDQ_OP_CFGI_STE 0x3
467 #define CMDQ_OP_CFGI_ALL 0x4
468 #define CMDQ_OP_CFGI_CD 0x5
469 #define CMDQ_OP_CFGI_CD_ALL 0x6
470 struct {
471 u32 sid;
472 u32 ssid;
473 union {
474 bool leaf;
475 u8 span;
477 } cfgi;
479 #define CMDQ_OP_TLBI_NH_ASID 0x11
480 #define CMDQ_OP_TLBI_NH_VA 0x12
481 #define CMDQ_OP_TLBI_EL2_ALL 0x20
482 #define CMDQ_OP_TLBI_S12_VMALL 0x28
483 #define CMDQ_OP_TLBI_S2_IPA 0x2a
484 #define CMDQ_OP_TLBI_NSNH_ALL 0x30
485 struct {
486 u8 num;
487 u8 scale;
488 u16 asid;
489 u16 vmid;
490 bool leaf;
491 u8 ttl;
492 u8 tg;
493 u64 addr;
494 } tlbi;
496 #define CMDQ_OP_ATC_INV 0x40
497 #define ATC_INV_SIZE_ALL 52
498 struct {
499 u32 sid;
500 u32 ssid;
501 u64 addr;
502 u8 size;
503 bool global;
504 } atc;
506 #define CMDQ_OP_PRI_RESP 0x41
507 struct {
508 u32 sid;
509 u32 ssid;
510 u16 grpid;
511 enum pri_resp resp;
512 } pri;
514 #define CMDQ_OP_CMD_SYNC 0x46
515 struct {
516 u64 msiaddr;
517 } sync;
521 struct arm_smmu_ll_queue {
522 union {
523 u64 val;
524 struct {
525 u32 prod;
526 u32 cons;
528 struct {
529 atomic_t prod;
530 atomic_t cons;
531 } atomic;
532 u8 __pad[SMP_CACHE_BYTES];
533 } ____cacheline_aligned_in_smp;
534 u32 max_n_shift;
537 struct arm_smmu_queue {
538 struct arm_smmu_ll_queue llq;
539 int irq; /* Wired interrupt */
541 __le64 *base;
542 dma_addr_t base_dma;
543 u64 q_base;
545 size_t ent_dwords;
547 u32 __iomem *prod_reg;
548 u32 __iomem *cons_reg;
551 struct arm_smmu_queue_poll {
552 ktime_t timeout;
553 unsigned int delay;
554 unsigned int spin_cnt;
555 bool wfe;
558 struct arm_smmu_cmdq {
559 struct arm_smmu_queue q;
560 atomic_long_t *valid_map;
561 atomic_t owner_prod;
562 atomic_t lock;
565 struct arm_smmu_cmdq_batch {
566 u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
567 int num;
570 struct arm_smmu_evtq {
571 struct arm_smmu_queue q;
572 u32 max_stalls;
575 struct arm_smmu_priq {
576 struct arm_smmu_queue q;
579 /* High-level stream table and context descriptor structures */
580 struct arm_smmu_strtab_l1_desc {
581 u8 span;
583 __le64 *l2ptr;
584 dma_addr_t l2ptr_dma;
587 struct arm_smmu_ctx_desc {
588 u16 asid;
589 u64 ttbr;
590 u64 tcr;
591 u64 mair;
594 struct arm_smmu_l1_ctx_desc {
595 __le64 *l2ptr;
596 dma_addr_t l2ptr_dma;
599 struct arm_smmu_ctx_desc_cfg {
600 __le64 *cdtab;
601 dma_addr_t cdtab_dma;
602 struct arm_smmu_l1_ctx_desc *l1_desc;
603 unsigned int num_l1_ents;
606 struct arm_smmu_s1_cfg {
607 struct arm_smmu_ctx_desc_cfg cdcfg;
608 struct arm_smmu_ctx_desc cd;
609 u8 s1fmt;
610 u8 s1cdmax;
613 struct arm_smmu_s2_cfg {
614 u16 vmid;
615 u64 vttbr;
616 u64 vtcr;
619 struct arm_smmu_strtab_cfg {
620 __le64 *strtab;
621 dma_addr_t strtab_dma;
622 struct arm_smmu_strtab_l1_desc *l1_desc;
623 unsigned int num_l1_ents;
625 u64 strtab_base;
626 u32 strtab_base_cfg;
629 /* An SMMUv3 instance */
630 struct arm_smmu_device {
631 struct device *dev;
632 void __iomem *base;
633 void __iomem *page1;
635 #define ARM_SMMU_FEAT_2_LVL_STRTAB (1 << 0)
636 #define ARM_SMMU_FEAT_2_LVL_CDTAB (1 << 1)
637 #define ARM_SMMU_FEAT_TT_LE (1 << 2)
638 #define ARM_SMMU_FEAT_TT_BE (1 << 3)
639 #define ARM_SMMU_FEAT_PRI (1 << 4)
640 #define ARM_SMMU_FEAT_ATS (1 << 5)
641 #define ARM_SMMU_FEAT_SEV (1 << 6)
642 #define ARM_SMMU_FEAT_MSI (1 << 7)
643 #define ARM_SMMU_FEAT_COHERENCY (1 << 8)
644 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 9)
645 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
646 #define ARM_SMMU_FEAT_STALLS (1 << 11)
647 #define ARM_SMMU_FEAT_HYP (1 << 12)
648 #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
649 #define ARM_SMMU_FEAT_VAX (1 << 14)
650 #define ARM_SMMU_FEAT_RANGE_INV (1 << 15)
651 u32 features;
653 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
654 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
655 u32 options;
657 struct arm_smmu_cmdq cmdq;
658 struct arm_smmu_evtq evtq;
659 struct arm_smmu_priq priq;
661 int gerr_irq;
662 int combined_irq;
664 unsigned long ias; /* IPA */
665 unsigned long oas; /* PA */
666 unsigned long pgsize_bitmap;
668 #define ARM_SMMU_MAX_ASIDS (1 << 16)
669 unsigned int asid_bits;
670 DECLARE_BITMAP(asid_map, ARM_SMMU_MAX_ASIDS);
672 #define ARM_SMMU_MAX_VMIDS (1 << 16)
673 unsigned int vmid_bits;
674 DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
676 unsigned int ssid_bits;
677 unsigned int sid_bits;
679 struct arm_smmu_strtab_cfg strtab_cfg;
681 /* IOMMU core code handle */
682 struct iommu_device iommu;
685 /* SMMU private data for each master */
686 struct arm_smmu_master {
687 struct arm_smmu_device *smmu;
688 struct device *dev;
689 struct arm_smmu_domain *domain;
690 struct list_head domain_head;
691 u32 *sids;
692 unsigned int num_sids;
693 bool ats_enabled;
694 unsigned int ssid_bits;
697 /* SMMU private data for an IOMMU domain */
698 enum arm_smmu_domain_stage {
699 ARM_SMMU_DOMAIN_S1 = 0,
700 ARM_SMMU_DOMAIN_S2,
701 ARM_SMMU_DOMAIN_NESTED,
702 ARM_SMMU_DOMAIN_BYPASS,
705 struct arm_smmu_domain {
706 struct arm_smmu_device *smmu;
707 struct mutex init_mutex; /* Protects smmu pointer */
709 struct io_pgtable_ops *pgtbl_ops;
710 bool non_strict;
711 atomic_t nr_ats_masters;
713 enum arm_smmu_domain_stage stage;
714 union {
715 struct arm_smmu_s1_cfg s1_cfg;
716 struct arm_smmu_s2_cfg s2_cfg;
719 struct iommu_domain domain;
721 struct list_head devices;
722 spinlock_t devices_lock;
725 struct arm_smmu_option_prop {
726 u32 opt;
727 const char *prop;
730 static struct arm_smmu_option_prop arm_smmu_options[] = {
731 { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
732 { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
733 { 0, NULL},
736 static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
737 struct arm_smmu_device *smmu)
739 if (offset > SZ_64K)
740 return smmu->page1 + offset - SZ_64K;
742 return smmu->base + offset;
745 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
747 return container_of(dom, struct arm_smmu_domain, domain);
750 static void parse_driver_options(struct arm_smmu_device *smmu)
752 int i = 0;
754 do {
755 if (of_property_read_bool(smmu->dev->of_node,
756 arm_smmu_options[i].prop)) {
757 smmu->options |= arm_smmu_options[i].opt;
758 dev_notice(smmu->dev, "option %s\n",
759 arm_smmu_options[i].prop);
761 } while (arm_smmu_options[++i].opt);
764 /* Low-level queue manipulation functions */
765 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
767 u32 space, prod, cons;
769 prod = Q_IDX(q, q->prod);
770 cons = Q_IDX(q, q->cons);
772 if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
773 space = (1 << q->max_n_shift) - (prod - cons);
774 else
775 space = cons - prod;
777 return space >= n;
780 static bool queue_full(struct arm_smmu_ll_queue *q)
782 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
783 Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
786 static bool queue_empty(struct arm_smmu_ll_queue *q)
788 return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
789 Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
792 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
794 return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
795 (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
796 ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
797 (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
800 static void queue_sync_cons_out(struct arm_smmu_queue *q)
803 * Ensure that all CPU accesses (reads and writes) to the queue
804 * are complete before we update the cons pointer.
806 mb();
807 writel_relaxed(q->llq.cons, q->cons_reg);
810 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
812 u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
813 q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
816 static int queue_sync_prod_in(struct arm_smmu_queue *q)
818 int ret = 0;
819 u32 prod = readl_relaxed(q->prod_reg);
821 if (Q_OVF(prod) != Q_OVF(q->llq.prod))
822 ret = -EOVERFLOW;
824 q->llq.prod = prod;
825 return ret;
828 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
830 u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
831 return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
834 static void queue_poll_init(struct arm_smmu_device *smmu,
835 struct arm_smmu_queue_poll *qp)
837 qp->delay = 1;
838 qp->spin_cnt = 0;
839 qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
840 qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
843 static int queue_poll(struct arm_smmu_queue_poll *qp)
845 if (ktime_compare(ktime_get(), qp->timeout) > 0)
846 return -ETIMEDOUT;
848 if (qp->wfe) {
849 wfe();
850 } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
851 cpu_relax();
852 } else {
853 udelay(qp->delay);
854 qp->delay *= 2;
855 qp->spin_cnt = 0;
858 return 0;
861 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
863 int i;
865 for (i = 0; i < n_dwords; ++i)
866 *dst++ = cpu_to_le64(*src++);
869 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
871 int i;
873 for (i = 0; i < n_dwords; ++i)
874 *dst++ = le64_to_cpu(*src++);
877 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
879 if (queue_empty(&q->llq))
880 return -EAGAIN;
882 queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
883 queue_inc_cons(&q->llq);
884 queue_sync_cons_out(q);
885 return 0;
888 /* High-level queue accessors */
889 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
891 memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
892 cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
894 switch (ent->opcode) {
895 case CMDQ_OP_TLBI_EL2_ALL:
896 case CMDQ_OP_TLBI_NSNH_ALL:
897 break;
898 case CMDQ_OP_PREFETCH_CFG:
899 cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
900 cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
901 cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
902 break;
903 case CMDQ_OP_CFGI_CD:
904 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
905 /* Fallthrough */
906 case CMDQ_OP_CFGI_STE:
907 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
908 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
909 break;
910 case CMDQ_OP_CFGI_CD_ALL:
911 cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
912 break;
913 case CMDQ_OP_CFGI_ALL:
914 /* Cover the entire SID range */
915 cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
916 break;
917 case CMDQ_OP_TLBI_NH_VA:
918 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
919 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
920 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
921 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
922 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
923 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
924 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
925 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
926 break;
927 case CMDQ_OP_TLBI_S2_IPA:
928 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
929 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
930 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
931 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
932 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
933 cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
934 cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
935 break;
936 case CMDQ_OP_TLBI_NH_ASID:
937 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
938 /* Fallthrough */
939 case CMDQ_OP_TLBI_S12_VMALL:
940 cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
941 break;
942 case CMDQ_OP_ATC_INV:
943 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
944 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
945 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
946 cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
947 cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
948 cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
949 break;
950 case CMDQ_OP_PRI_RESP:
951 cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
952 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
953 cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
954 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
955 switch (ent->pri.resp) {
956 case PRI_RESP_DENY:
957 case PRI_RESP_FAIL:
958 case PRI_RESP_SUCC:
959 break;
960 default:
961 return -EINVAL;
963 cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
964 break;
965 case CMDQ_OP_CMD_SYNC:
966 if (ent->sync.msiaddr) {
967 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
968 cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
969 } else {
970 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
972 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
973 cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
974 break;
975 default:
976 return -ENOENT;
979 return 0;
982 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
983 u32 prod)
985 struct arm_smmu_queue *q = &smmu->cmdq.q;
986 struct arm_smmu_cmdq_ent ent = {
987 .opcode = CMDQ_OP_CMD_SYNC,
991 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
992 * payload, so the write will zero the entire command on that platform.
994 if (smmu->features & ARM_SMMU_FEAT_MSI &&
995 smmu->features & ARM_SMMU_FEAT_COHERENCY) {
996 ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
997 q->ent_dwords * 8;
1000 arm_smmu_cmdq_build_cmd(cmd, &ent);
1003 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
1005 static const char *cerror_str[] = {
1006 [CMDQ_ERR_CERROR_NONE_IDX] = "No error",
1007 [CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
1008 [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
1009 [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
1012 int i;
1013 u64 cmd[CMDQ_ENT_DWORDS];
1014 struct arm_smmu_queue *q = &smmu->cmdq.q;
1015 u32 cons = readl_relaxed(q->cons_reg);
1016 u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
1017 struct arm_smmu_cmdq_ent cmd_sync = {
1018 .opcode = CMDQ_OP_CMD_SYNC,
1021 dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
1022 idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
1024 switch (idx) {
1025 case CMDQ_ERR_CERROR_ABT_IDX:
1026 dev_err(smmu->dev, "retrying command fetch\n");
1027 case CMDQ_ERR_CERROR_NONE_IDX:
1028 return;
1029 case CMDQ_ERR_CERROR_ATC_INV_IDX:
1031 * ATC Invalidation Completion timeout. CONS is still pointing
1032 * at the CMD_SYNC. Attempt to complete other pending commands
1033 * by repeating the CMD_SYNC, though we might well end up back
1034 * here since the ATC invalidation may still be pending.
1036 return;
1037 case CMDQ_ERR_CERROR_ILL_IDX:
1038 /* Fallthrough */
1039 default:
1040 break;
1044 * We may have concurrent producers, so we need to be careful
1045 * not to touch any of the shadow cmdq state.
1047 queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
1048 dev_err(smmu->dev, "skipping command in error state:\n");
1049 for (i = 0; i < ARRAY_SIZE(cmd); ++i)
1050 dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
1052 /* Convert the erroneous command into a CMD_SYNC */
1053 if (arm_smmu_cmdq_build_cmd(cmd, &cmd_sync)) {
1054 dev_err(smmu->dev, "failed to convert to CMD_SYNC\n");
1055 return;
1058 queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
1062 * Command queue locking.
1063 * This is a form of bastardised rwlock with the following major changes:
1065 * - The only LOCK routines are exclusive_trylock() and shared_lock().
1066 * Neither have barrier semantics, and instead provide only a control
1067 * dependency.
1069 * - The UNLOCK routines are supplemented with shared_tryunlock(), which
1070 * fails if the caller appears to be the last lock holder (yes, this is
1071 * racy). All successful UNLOCK routines have RELEASE semantics.
1073 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
1075 int val;
1078 * We can try to avoid the cmpxchg() loop by simply incrementing the
1079 * lock counter. When held in exclusive state, the lock counter is set
1080 * to INT_MIN so these increments won't hurt as the value will remain
1081 * negative.
1083 if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
1084 return;
1086 do {
1087 val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
1088 } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
1091 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
1093 (void)atomic_dec_return_release(&cmdq->lock);
1096 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
1098 if (atomic_read(&cmdq->lock) == 1)
1099 return false;
1101 arm_smmu_cmdq_shared_unlock(cmdq);
1102 return true;
1105 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
1106 ({ \
1107 bool __ret; \
1108 local_irq_save(flags); \
1109 __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
1110 if (!__ret) \
1111 local_irq_restore(flags); \
1112 __ret; \
1115 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
1116 ({ \
1117 atomic_set_release(&cmdq->lock, 0); \
1118 local_irq_restore(flags); \
1123 * Command queue insertion.
1124 * This is made fiddly by our attempts to achieve some sort of scalability
1125 * since there is one queue shared amongst all of the CPUs in the system. If
1126 * you like mixed-size concurrency, dependency ordering and relaxed atomics,
1127 * then you'll *love* this monstrosity.
1129 * The basic idea is to split the queue up into ranges of commands that are
1130 * owned by a given CPU; the owner may not have written all of the commands
1131 * itself, but is responsible for advancing the hardware prod pointer when
1132 * the time comes. The algorithm is roughly:
1134 * 1. Allocate some space in the queue. At this point we also discover
1135 * whether the head of the queue is currently owned by another CPU,
1136 * or whether we are the owner.
1138 * 2. Write our commands into our allocated slots in the queue.
1140 * 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
1142 * 4. If we are an owner:
1143 * a. Wait for the previous owner to finish.
1144 * b. Mark the queue head as unowned, which tells us the range
1145 * that we are responsible for publishing.
1146 * c. Wait for all commands in our owned range to become valid.
1147 * d. Advance the hardware prod pointer.
1148 * e. Tell the next owner we've finished.
1150 * 5. If we are inserting a CMD_SYNC (we may or may not have been an
1151 * owner), then we need to stick around until it has completed:
1152 * a. If we have MSIs, the SMMU can write back into the CMD_SYNC
1153 * to clear the first 4 bytes.
1154 * b. Otherwise, we spin waiting for the hardware cons pointer to
1155 * advance past our command.
1157 * The devil is in the details, particularly the use of locking for handling
1158 * SYNC completion and freeing up space in the queue before we think that it is
1159 * full.
1161 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
1162 u32 sprod, u32 eprod, bool set)
1164 u32 swidx, sbidx, ewidx, ebidx;
1165 struct arm_smmu_ll_queue llq = {
1166 .max_n_shift = cmdq->q.llq.max_n_shift,
1167 .prod = sprod,
1170 ewidx = BIT_WORD(Q_IDX(&llq, eprod));
1171 ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
1173 while (llq.prod != eprod) {
1174 unsigned long mask;
1175 atomic_long_t *ptr;
1176 u32 limit = BITS_PER_LONG;
1178 swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
1179 sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
1181 ptr = &cmdq->valid_map[swidx];
1183 if ((swidx == ewidx) && (sbidx < ebidx))
1184 limit = ebidx;
1186 mask = GENMASK(limit - 1, sbidx);
1189 * The valid bit is the inverse of the wrap bit. This means
1190 * that a zero-initialised queue is invalid and, after marking
1191 * all entries as valid, they become invalid again when we
1192 * wrap.
1194 if (set) {
1195 atomic_long_xor(mask, ptr);
1196 } else { /* Poll */
1197 unsigned long valid;
1199 valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
1200 atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
1203 llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
1207 /* Mark all entries in the range [sprod, eprod) as valid */
1208 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
1209 u32 sprod, u32 eprod)
1211 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
1214 /* Wait for all entries in the range [sprod, eprod) to become valid */
1215 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
1216 u32 sprod, u32 eprod)
1218 __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
1221 /* Wait for the command queue to become non-full */
1222 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
1223 struct arm_smmu_ll_queue *llq)
1225 unsigned long flags;
1226 struct arm_smmu_queue_poll qp;
1227 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1228 int ret = 0;
1231 * Try to update our copy of cons by grabbing exclusive cmdq access. If
1232 * that fails, spin until somebody else updates it for us.
1234 if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
1235 WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
1236 arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
1237 llq->val = READ_ONCE(cmdq->q.llq.val);
1238 return 0;
1241 queue_poll_init(smmu, &qp);
1242 do {
1243 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1244 if (!queue_full(llq))
1245 break;
1247 ret = queue_poll(&qp);
1248 } while (!ret);
1250 return ret;
1254 * Wait until the SMMU signals a CMD_SYNC completion MSI.
1255 * Must be called with the cmdq lock held in some capacity.
1257 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
1258 struct arm_smmu_ll_queue *llq)
1260 int ret = 0;
1261 struct arm_smmu_queue_poll qp;
1262 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1263 u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
1265 queue_poll_init(smmu, &qp);
1268 * The MSI won't generate an event, since it's being written back
1269 * into the command queue.
1271 qp.wfe = false;
1272 smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
1273 llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
1274 return ret;
1278 * Wait until the SMMU cons index passes llq->prod.
1279 * Must be called with the cmdq lock held in some capacity.
1281 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
1282 struct arm_smmu_ll_queue *llq)
1284 struct arm_smmu_queue_poll qp;
1285 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1286 u32 prod = llq->prod;
1287 int ret = 0;
1289 queue_poll_init(smmu, &qp);
1290 llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
1291 do {
1292 if (queue_consumed(llq, prod))
1293 break;
1295 ret = queue_poll(&qp);
1298 * This needs to be a readl() so that our subsequent call
1299 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
1301 * Specifically, we need to ensure that we observe all
1302 * shared_lock()s by other CMD_SYNCs that share our owner,
1303 * so that a failing call to tryunlock() means that we're
1304 * the last one out and therefore we can safely advance
1305 * cmdq->q.llq.cons. Roughly speaking:
1307 * CPU 0 CPU1 CPU2 (us)
1309 * if (sync)
1310 * shared_lock();
1312 * dma_wmb();
1313 * set_valid_map();
1315 * if (owner) {
1316 * poll_valid_map();
1317 * <control dependency>
1318 * writel(prod_reg);
1320 * readl(cons_reg);
1321 * tryunlock();
1323 * Requires us to see CPU 0's shared_lock() acquisition.
1325 llq->cons = readl(cmdq->q.cons_reg);
1326 } while (!ret);
1328 return ret;
1331 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
1332 struct arm_smmu_ll_queue *llq)
1334 if (smmu->features & ARM_SMMU_FEAT_MSI &&
1335 smmu->features & ARM_SMMU_FEAT_COHERENCY)
1336 return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
1338 return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
1341 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
1342 u32 prod, int n)
1344 int i;
1345 struct arm_smmu_ll_queue llq = {
1346 .max_n_shift = cmdq->q.llq.max_n_shift,
1347 .prod = prod,
1350 for (i = 0; i < n; ++i) {
1351 u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
1353 prod = queue_inc_prod_n(&llq, i);
1354 queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
1359 * This is the actual insertion function, and provides the following
1360 * ordering guarantees to callers:
1362 * - There is a dma_wmb() before publishing any commands to the queue.
1363 * This can be relied upon to order prior writes to data structures
1364 * in memory (such as a CD or an STE) before the command.
1366 * - On completion of a CMD_SYNC, there is a control dependency.
1367 * This can be relied upon to order subsequent writes to memory (e.g.
1368 * freeing an IOVA) after completion of the CMD_SYNC.
1370 * - Command insertion is totally ordered, so if two CPUs each race to
1371 * insert their own list of commands then all of the commands from one
1372 * CPU will appear before any of the commands from the other CPU.
1374 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
1375 u64 *cmds, int n, bool sync)
1377 u64 cmd_sync[CMDQ_ENT_DWORDS];
1378 u32 prod;
1379 unsigned long flags;
1380 bool owner;
1381 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1382 struct arm_smmu_ll_queue llq = {
1383 .max_n_shift = cmdq->q.llq.max_n_shift,
1384 }, head = llq;
1385 int ret = 0;
1387 /* 1. Allocate some space in the queue */
1388 local_irq_save(flags);
1389 llq.val = READ_ONCE(cmdq->q.llq.val);
1390 do {
1391 u64 old;
1393 while (!queue_has_space(&llq, n + sync)) {
1394 local_irq_restore(flags);
1395 if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
1396 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
1397 local_irq_save(flags);
1400 head.cons = llq.cons;
1401 head.prod = queue_inc_prod_n(&llq, n + sync) |
1402 CMDQ_PROD_OWNED_FLAG;
1404 old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
1405 if (old == llq.val)
1406 break;
1408 llq.val = old;
1409 } while (1);
1410 owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
1411 head.prod &= ~CMDQ_PROD_OWNED_FLAG;
1412 llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
1415 * 2. Write our commands into the queue
1416 * Dependency ordering from the cmpxchg() loop above.
1418 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
1419 if (sync) {
1420 prod = queue_inc_prod_n(&llq, n);
1421 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
1422 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
1425 * In order to determine completion of our CMD_SYNC, we must
1426 * ensure that the queue can't wrap twice without us noticing.
1427 * We achieve that by taking the cmdq lock as shared before
1428 * marking our slot as valid.
1430 arm_smmu_cmdq_shared_lock(cmdq);
1433 /* 3. Mark our slots as valid, ensuring commands are visible first */
1434 dma_wmb();
1435 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
1437 /* 4. If we are the owner, take control of the SMMU hardware */
1438 if (owner) {
1439 /* a. Wait for previous owner to finish */
1440 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
1442 /* b. Stop gathering work by clearing the owned flag */
1443 prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
1444 &cmdq->q.llq.atomic.prod);
1445 prod &= ~CMDQ_PROD_OWNED_FLAG;
1448 * c. Wait for any gathered work to be written to the queue.
1449 * Note that we read our own entries so that we have the control
1450 * dependency required by (d).
1452 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
1455 * d. Advance the hardware prod pointer
1456 * Control dependency ordering from the entries becoming valid.
1458 writel_relaxed(prod, cmdq->q.prod_reg);
1461 * e. Tell the next owner we're done
1462 * Make sure we've updated the hardware first, so that we don't
1463 * race to update prod and potentially move it backwards.
1465 atomic_set_release(&cmdq->owner_prod, prod);
1468 /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
1469 if (sync) {
1470 llq.prod = queue_inc_prod_n(&llq, n);
1471 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
1472 if (ret) {
1473 dev_err_ratelimited(smmu->dev,
1474 "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
1475 llq.prod,
1476 readl_relaxed(cmdq->q.prod_reg),
1477 readl_relaxed(cmdq->q.cons_reg));
1481 * Try to unlock the cmq lock. This will fail if we're the last
1482 * reader, in which case we can safely update cmdq->q.llq.cons
1484 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
1485 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
1486 arm_smmu_cmdq_shared_unlock(cmdq);
1490 local_irq_restore(flags);
1491 return ret;
1494 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
1495 struct arm_smmu_cmdq_ent *ent)
1497 u64 cmd[CMDQ_ENT_DWORDS];
1499 if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
1500 dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
1501 ent->opcode);
1502 return -EINVAL;
1505 return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
1508 static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
1510 return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
1513 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
1514 struct arm_smmu_cmdq_batch *cmds,
1515 struct arm_smmu_cmdq_ent *cmd)
1517 if (cmds->num == CMDQ_BATCH_ENTRIES) {
1518 arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
1519 cmds->num = 0;
1521 arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
1522 cmds->num++;
1525 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
1526 struct arm_smmu_cmdq_batch *cmds)
1528 return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
1531 /* Context descriptor manipulation functions */
1532 static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
1533 int ssid, bool leaf)
1535 size_t i;
1536 unsigned long flags;
1537 struct arm_smmu_master *master;
1538 struct arm_smmu_cmdq_batch cmds = {};
1539 struct arm_smmu_device *smmu = smmu_domain->smmu;
1540 struct arm_smmu_cmdq_ent cmd = {
1541 .opcode = CMDQ_OP_CFGI_CD,
1542 .cfgi = {
1543 .ssid = ssid,
1544 .leaf = leaf,
1548 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1549 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1550 for (i = 0; i < master->num_sids; i++) {
1551 cmd.cfgi.sid = master->sids[i];
1552 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
1555 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1557 arm_smmu_cmdq_batch_submit(smmu, &cmds);
1560 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
1561 struct arm_smmu_l1_ctx_desc *l1_desc)
1563 size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1565 l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1566 &l1_desc->l2ptr_dma, GFP_KERNEL);
1567 if (!l1_desc->l2ptr) {
1568 dev_warn(smmu->dev,
1569 "failed to allocate context descriptor table\n");
1570 return -ENOMEM;
1572 return 0;
1575 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1576 struct arm_smmu_l1_ctx_desc *l1_desc)
1578 u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1579 CTXDESC_L1_DESC_V;
1581 /* See comment in arm_smmu_write_ctx_desc() */
1582 WRITE_ONCE(*dst, cpu_to_le64(val));
1585 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
1586 u32 ssid)
1588 __le64 *l1ptr;
1589 unsigned int idx;
1590 struct arm_smmu_l1_ctx_desc *l1_desc;
1591 struct arm_smmu_device *smmu = smmu_domain->smmu;
1592 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1594 if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1595 return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
1597 idx = ssid >> CTXDESC_SPLIT;
1598 l1_desc = &cdcfg->l1_desc[idx];
1599 if (!l1_desc->l2ptr) {
1600 if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1601 return NULL;
1603 l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1604 arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1605 /* An invalid L1CD can be cached */
1606 arm_smmu_sync_cd(smmu_domain, ssid, false);
1608 idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1609 return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1612 static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain,
1613 int ssid, struct arm_smmu_ctx_desc *cd)
1616 * This function handles the following cases:
1618 * (1) Install primary CD, for normal DMA traffic (SSID = 0).
1619 * (2) Install a secondary CD, for SID+SSID traffic.
1620 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1621 * CD, then invalidate the old entry and mappings.
1622 * (4) Remove a secondary CD.
1624 u64 val;
1625 bool cd_live;
1626 __le64 *cdptr;
1627 struct arm_smmu_device *smmu = smmu_domain->smmu;
1629 if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
1630 return -E2BIG;
1632 cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
1633 if (!cdptr)
1634 return -ENOMEM;
1636 val = le64_to_cpu(cdptr[0]);
1637 cd_live = !!(val & CTXDESC_CD_0_V);
1639 if (!cd) { /* (4) */
1640 val = 0;
1641 } else if (cd_live) { /* (3) */
1642 val &= ~CTXDESC_CD_0_ASID;
1643 val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1645 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1646 * this substream's traffic
1648 } else { /* (1) and (2) */
1649 cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1650 cdptr[2] = 0;
1651 cdptr[3] = cpu_to_le64(cd->mair);
1654 * STE is live, and the SMMU might read dwords of this CD in any
1655 * order. Ensure that it observes valid values before reading
1656 * V=1.
1658 arm_smmu_sync_cd(smmu_domain, ssid, true);
1660 val = cd->tcr |
1661 #ifdef __BIG_ENDIAN
1662 CTXDESC_CD_0_ENDI |
1663 #endif
1664 CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
1665 CTXDESC_CD_0_AA64 |
1666 FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1667 CTXDESC_CD_0_V;
1669 /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
1670 if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
1671 val |= CTXDESC_CD_0_S;
1675 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1676 * "Configuration structures and configuration invalidation completion"
1678 * The size of single-copy atomic reads made by the SMMU is
1679 * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1680 * field within an aligned 64-bit span of a structure can be altered
1681 * without first making the structure invalid.
1683 WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1684 arm_smmu_sync_cd(smmu_domain, ssid, true);
1685 return 0;
1688 static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
1690 int ret;
1691 size_t l1size;
1692 size_t max_contexts;
1693 struct arm_smmu_device *smmu = smmu_domain->smmu;
1694 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
1695 struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
1697 max_contexts = 1 << cfg->s1cdmax;
1699 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1700 max_contexts <= CTXDESC_L2_ENTRIES) {
1701 cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1702 cdcfg->num_l1_ents = max_contexts;
1704 l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1705 } else {
1706 cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1707 cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
1708 CTXDESC_L2_ENTRIES);
1710 cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
1711 sizeof(*cdcfg->l1_desc),
1712 GFP_KERNEL);
1713 if (!cdcfg->l1_desc)
1714 return -ENOMEM;
1716 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1719 cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
1720 GFP_KERNEL);
1721 if (!cdcfg->cdtab) {
1722 dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1723 ret = -ENOMEM;
1724 goto err_free_l1;
1727 return 0;
1729 err_free_l1:
1730 if (cdcfg->l1_desc) {
1731 devm_kfree(smmu->dev, cdcfg->l1_desc);
1732 cdcfg->l1_desc = NULL;
1734 return ret;
1737 static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
1739 int i;
1740 size_t size, l1size;
1741 struct arm_smmu_device *smmu = smmu_domain->smmu;
1742 struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
1744 if (cdcfg->l1_desc) {
1745 size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1747 for (i = 0; i < cdcfg->num_l1_ents; i++) {
1748 if (!cdcfg->l1_desc[i].l2ptr)
1749 continue;
1751 dmam_free_coherent(smmu->dev, size,
1752 cdcfg->l1_desc[i].l2ptr,
1753 cdcfg->l1_desc[i].l2ptr_dma);
1755 devm_kfree(smmu->dev, cdcfg->l1_desc);
1756 cdcfg->l1_desc = NULL;
1758 l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1759 } else {
1760 l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1763 dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
1764 cdcfg->cdtab_dma = 0;
1765 cdcfg->cdtab = NULL;
1768 /* Stream table manipulation functions */
1769 static void
1770 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1772 u64 val = 0;
1774 val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1775 val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1777 /* See comment in arm_smmu_write_ctx_desc() */
1778 WRITE_ONCE(*dst, cpu_to_le64(val));
1781 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1783 struct arm_smmu_cmdq_ent cmd = {
1784 .opcode = CMDQ_OP_CFGI_STE,
1785 .cfgi = {
1786 .sid = sid,
1787 .leaf = true,
1791 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1792 arm_smmu_cmdq_issue_sync(smmu);
1795 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1796 __le64 *dst)
1799 * This is hideously complicated, but we only really care about
1800 * three cases at the moment:
1802 * 1. Invalid (all zero) -> bypass/fault (init)
1803 * 2. Bypass/fault -> translation/bypass (attach)
1804 * 3. Translation/bypass -> bypass/fault (detach)
1806 * Given that we can't update the STE atomically and the SMMU
1807 * doesn't read the thing in a defined order, that leaves us
1808 * with the following maintenance requirements:
1810 * 1. Update Config, return (init time STEs aren't live)
1811 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1812 * 3. Update Config, sync
1814 u64 val = le64_to_cpu(dst[0]);
1815 bool ste_live = false;
1816 struct arm_smmu_device *smmu = NULL;
1817 struct arm_smmu_s1_cfg *s1_cfg = NULL;
1818 struct arm_smmu_s2_cfg *s2_cfg = NULL;
1819 struct arm_smmu_domain *smmu_domain = NULL;
1820 struct arm_smmu_cmdq_ent prefetch_cmd = {
1821 .opcode = CMDQ_OP_PREFETCH_CFG,
1822 .prefetch = {
1823 .sid = sid,
1827 if (master) {
1828 smmu_domain = master->domain;
1829 smmu = master->smmu;
1832 if (smmu_domain) {
1833 switch (smmu_domain->stage) {
1834 case ARM_SMMU_DOMAIN_S1:
1835 s1_cfg = &smmu_domain->s1_cfg;
1836 break;
1837 case ARM_SMMU_DOMAIN_S2:
1838 case ARM_SMMU_DOMAIN_NESTED:
1839 s2_cfg = &smmu_domain->s2_cfg;
1840 break;
1841 default:
1842 break;
1846 if (val & STRTAB_STE_0_V) {
1847 switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1848 case STRTAB_STE_0_CFG_BYPASS:
1849 break;
1850 case STRTAB_STE_0_CFG_S1_TRANS:
1851 case STRTAB_STE_0_CFG_S2_TRANS:
1852 ste_live = true;
1853 break;
1854 case STRTAB_STE_0_CFG_ABORT:
1855 BUG_ON(!disable_bypass);
1856 break;
1857 default:
1858 BUG(); /* STE corruption */
1862 /* Nuke the existing STE_0 value, as we're going to rewrite it */
1863 val = STRTAB_STE_0_V;
1865 /* Bypass/fault */
1866 if (!smmu_domain || !(s1_cfg || s2_cfg)) {
1867 if (!smmu_domain && disable_bypass)
1868 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1869 else
1870 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1872 dst[0] = cpu_to_le64(val);
1873 dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1874 STRTAB_STE_1_SHCFG_INCOMING));
1875 dst[2] = 0; /* Nuke the VMID */
1877 * The SMMU can perform negative caching, so we must sync
1878 * the STE regardless of whether the old value was live.
1880 if (smmu)
1881 arm_smmu_sync_ste_for_sid(smmu, sid);
1882 return;
1885 if (s1_cfg) {
1886 BUG_ON(ste_live);
1887 dst[1] = cpu_to_le64(
1888 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1889 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1890 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1891 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1892 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
1894 if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1895 !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1896 dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1898 val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1899 FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1900 FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
1901 FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
1904 if (s2_cfg) {
1905 BUG_ON(ste_live);
1906 dst[2] = cpu_to_le64(
1907 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1908 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1909 #ifdef __BIG_ENDIAN
1910 STRTAB_STE_2_S2ENDI |
1911 #endif
1912 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1913 STRTAB_STE_2_S2R);
1915 dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1917 val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1920 if (master->ats_enabled)
1921 dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1922 STRTAB_STE_1_EATS_TRANS));
1924 arm_smmu_sync_ste_for_sid(smmu, sid);
1925 /* See comment in arm_smmu_write_ctx_desc() */
1926 WRITE_ONCE(dst[0], cpu_to_le64(val));
1927 arm_smmu_sync_ste_for_sid(smmu, sid);
1929 /* It's likely that we'll want to use the new STE soon */
1930 if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1931 arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1934 static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
1936 unsigned int i;
1938 for (i = 0; i < nent; ++i) {
1939 arm_smmu_write_strtab_ent(NULL, -1, strtab);
1940 strtab += STRTAB_STE_DWORDS;
1944 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1946 size_t size;
1947 void *strtab;
1948 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1949 struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1951 if (desc->l2ptr)
1952 return 0;
1954 size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1955 strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1957 desc->span = STRTAB_SPLIT + 1;
1958 desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1959 GFP_KERNEL);
1960 if (!desc->l2ptr) {
1961 dev_err(smmu->dev,
1962 "failed to allocate l2 stream table for SID %u\n",
1963 sid);
1964 return -ENOMEM;
1967 arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
1968 arm_smmu_write_strtab_l1_desc(strtab, desc);
1969 return 0;
1972 /* IRQ and event handlers */
1973 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1975 int i;
1976 struct arm_smmu_device *smmu = dev;
1977 struct arm_smmu_queue *q = &smmu->evtq.q;
1978 struct arm_smmu_ll_queue *llq = &q->llq;
1979 u64 evt[EVTQ_ENT_DWORDS];
1981 do {
1982 while (!queue_remove_raw(q, evt)) {
1983 u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1985 dev_info(smmu->dev, "event 0x%02x received:\n", id);
1986 for (i = 0; i < ARRAY_SIZE(evt); ++i)
1987 dev_info(smmu->dev, "\t0x%016llx\n",
1988 (unsigned long long)evt[i]);
1993 * Not much we can do on overflow, so scream and pretend we're
1994 * trying harder.
1996 if (queue_sync_prod_in(q) == -EOVERFLOW)
1997 dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1998 } while (!queue_empty(llq));
2000 /* Sync our overflow flag, as we believe we're up to speed */
2001 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
2002 Q_IDX(llq, llq->cons);
2003 return IRQ_HANDLED;
2006 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
2008 u32 sid, ssid;
2009 u16 grpid;
2010 bool ssv, last;
2012 sid = FIELD_GET(PRIQ_0_SID, evt[0]);
2013 ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
2014 ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
2015 last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
2016 grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
2018 dev_info(smmu->dev, "unexpected PRI request received:\n");
2019 dev_info(smmu->dev,
2020 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
2021 sid, ssid, grpid, last ? "L" : "",
2022 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
2023 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
2024 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
2025 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
2026 evt[1] & PRIQ_1_ADDR_MASK);
2028 if (last) {
2029 struct arm_smmu_cmdq_ent cmd = {
2030 .opcode = CMDQ_OP_PRI_RESP,
2031 .substream_valid = ssv,
2032 .pri = {
2033 .sid = sid,
2034 .ssid = ssid,
2035 .grpid = grpid,
2036 .resp = PRI_RESP_DENY,
2040 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2044 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
2046 struct arm_smmu_device *smmu = dev;
2047 struct arm_smmu_queue *q = &smmu->priq.q;
2048 struct arm_smmu_ll_queue *llq = &q->llq;
2049 u64 evt[PRIQ_ENT_DWORDS];
2051 do {
2052 while (!queue_remove_raw(q, evt))
2053 arm_smmu_handle_ppr(smmu, evt);
2055 if (queue_sync_prod_in(q) == -EOVERFLOW)
2056 dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
2057 } while (!queue_empty(llq));
2059 /* Sync our overflow flag, as we believe we're up to speed */
2060 llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
2061 Q_IDX(llq, llq->cons);
2062 queue_sync_cons_out(q);
2063 return IRQ_HANDLED;
2066 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
2068 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
2070 u32 gerror, gerrorn, active;
2071 struct arm_smmu_device *smmu = dev;
2073 gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
2074 gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
2076 active = gerror ^ gerrorn;
2077 if (!(active & GERROR_ERR_MASK))
2078 return IRQ_NONE; /* No errors pending */
2080 dev_warn(smmu->dev,
2081 "unexpected global error reported (0x%08x), this could be serious\n",
2082 active);
2084 if (active & GERROR_SFM_ERR) {
2085 dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
2086 arm_smmu_device_disable(smmu);
2089 if (active & GERROR_MSI_GERROR_ABT_ERR)
2090 dev_warn(smmu->dev, "GERROR MSI write aborted\n");
2092 if (active & GERROR_MSI_PRIQ_ABT_ERR)
2093 dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
2095 if (active & GERROR_MSI_EVTQ_ABT_ERR)
2096 dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
2098 if (active & GERROR_MSI_CMDQ_ABT_ERR)
2099 dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
2101 if (active & GERROR_PRIQ_ABT_ERR)
2102 dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
2104 if (active & GERROR_EVTQ_ABT_ERR)
2105 dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
2107 if (active & GERROR_CMDQ_ERR)
2108 arm_smmu_cmdq_skip_err(smmu);
2110 writel(gerror, smmu->base + ARM_SMMU_GERRORN);
2111 return IRQ_HANDLED;
2114 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
2116 struct arm_smmu_device *smmu = dev;
2118 arm_smmu_evtq_thread(irq, dev);
2119 if (smmu->features & ARM_SMMU_FEAT_PRI)
2120 arm_smmu_priq_thread(irq, dev);
2122 return IRQ_HANDLED;
2125 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
2127 arm_smmu_gerror_handler(irq, dev);
2128 return IRQ_WAKE_THREAD;
2131 static void
2132 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
2133 struct arm_smmu_cmdq_ent *cmd)
2135 size_t log2_span;
2136 size_t span_mask;
2137 /* ATC invalidates are always on 4096-bytes pages */
2138 size_t inval_grain_shift = 12;
2139 unsigned long page_start, page_end;
2141 *cmd = (struct arm_smmu_cmdq_ent) {
2142 .opcode = CMDQ_OP_ATC_INV,
2143 .substream_valid = !!ssid,
2144 .atc.ssid = ssid,
2147 if (!size) {
2148 cmd->atc.size = ATC_INV_SIZE_ALL;
2149 return;
2152 page_start = iova >> inval_grain_shift;
2153 page_end = (iova + size - 1) >> inval_grain_shift;
2156 * In an ATS Invalidate Request, the address must be aligned on the
2157 * range size, which must be a power of two number of page sizes. We
2158 * thus have to choose between grossly over-invalidating the region, or
2159 * splitting the invalidation into multiple commands. For simplicity
2160 * we'll go with the first solution, but should refine it in the future
2161 * if multiple commands are shown to be more efficient.
2163 * Find the smallest power of two that covers the range. The most
2164 * significant differing bit between the start and end addresses,
2165 * fls(start ^ end), indicates the required span. For example:
2167 * We want to invalidate pages [8; 11]. This is already the ideal range:
2168 * x = 0b1000 ^ 0b1011 = 0b11
2169 * span = 1 << fls(x) = 4
2171 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
2172 * x = 0b0111 ^ 0b1010 = 0b1101
2173 * span = 1 << fls(x) = 16
2175 log2_span = fls_long(page_start ^ page_end);
2176 span_mask = (1ULL << log2_span) - 1;
2178 page_start &= ~span_mask;
2180 cmd->atc.addr = page_start << inval_grain_shift;
2181 cmd->atc.size = log2_span;
2184 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
2186 int i;
2187 struct arm_smmu_cmdq_ent cmd;
2189 arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
2191 for (i = 0; i < master->num_sids; i++) {
2192 cmd.atc.sid = master->sids[i];
2193 arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
2196 return arm_smmu_cmdq_issue_sync(master->smmu);
2199 static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
2200 int ssid, unsigned long iova, size_t size)
2202 int i;
2203 unsigned long flags;
2204 struct arm_smmu_cmdq_ent cmd;
2205 struct arm_smmu_master *master;
2206 struct arm_smmu_cmdq_batch cmds = {};
2208 if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
2209 return 0;
2212 * Ensure that we've completed prior invalidation of the main TLBs
2213 * before we read 'nr_ats_masters' in case of a concurrent call to
2214 * arm_smmu_enable_ats():
2216 * // unmap() // arm_smmu_enable_ats()
2217 * TLBI+SYNC atomic_inc(&nr_ats_masters);
2218 * smp_mb(); [...]
2219 * atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
2221 * Ensures that we always see the incremented 'nr_ats_masters' count if
2222 * ATS was enabled at the PCI device before completion of the TLBI.
2224 smp_mb();
2225 if (!atomic_read(&smmu_domain->nr_ats_masters))
2226 return 0;
2228 arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
2230 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2231 list_for_each_entry(master, &smmu_domain->devices, domain_head) {
2232 if (!master->ats_enabled)
2233 continue;
2235 for (i = 0; i < master->num_sids; i++) {
2236 cmd.atc.sid = master->sids[i];
2237 arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
2240 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2242 return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
2245 /* IO_PGTABLE API */
2246 static void arm_smmu_tlb_inv_context(void *cookie)
2248 struct arm_smmu_domain *smmu_domain = cookie;
2249 struct arm_smmu_device *smmu = smmu_domain->smmu;
2250 struct arm_smmu_cmdq_ent cmd;
2252 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2253 cmd.opcode = CMDQ_OP_TLBI_NH_ASID;
2254 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
2255 cmd.tlbi.vmid = 0;
2256 } else {
2257 cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
2258 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2262 * NOTE: when io-pgtable is in non-strict mode, we may get here with
2263 * PTEs previously cleared by unmaps on the current CPU not yet visible
2264 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
2265 * insertion to guarantee those are observed before the TLBI. Do be
2266 * careful, 007.
2268 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
2269 arm_smmu_cmdq_issue_sync(smmu);
2270 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2273 static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
2274 size_t granule, bool leaf,
2275 struct arm_smmu_domain *smmu_domain)
2277 struct arm_smmu_device *smmu = smmu_domain->smmu;
2278 unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
2279 size_t inv_range = granule;
2280 struct arm_smmu_cmdq_batch cmds = {};
2281 struct arm_smmu_cmdq_ent cmd = {
2282 .tlbi = {
2283 .leaf = leaf,
2287 if (!size)
2288 return;
2290 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2291 cmd.opcode = CMDQ_OP_TLBI_NH_VA;
2292 cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
2293 } else {
2294 cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
2295 cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
2298 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2299 /* Get the leaf page size */
2300 tg = __ffs(smmu_domain->domain.pgsize_bitmap);
2302 /* Convert page size of 12,14,16 (log2) to 1,2,3 */
2303 cmd.tlbi.tg = (tg - 10) / 2;
2305 /* Determine what level the granule is at */
2306 cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
2308 num_pages = size >> tg;
2311 while (iova < end) {
2312 if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
2314 * On each iteration of the loop, the range is 5 bits
2315 * worth of the aligned size remaining.
2316 * The range in pages is:
2318 * range = (num_pages & (0x1f << __ffs(num_pages)))
2320 unsigned long scale, num;
2322 /* Determine the power of 2 multiple number of pages */
2323 scale = __ffs(num_pages);
2324 cmd.tlbi.scale = scale;
2326 /* Determine how many chunks of 2^scale size we have */
2327 num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
2328 cmd.tlbi.num = num - 1;
2330 /* range is num * 2^scale * pgsize */
2331 inv_range = num << (scale + tg);
2333 /* Clear out the lower order bits for the next iteration */
2334 num_pages -= num << scale;
2337 cmd.tlbi.addr = iova;
2338 arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
2339 iova += inv_range;
2341 arm_smmu_cmdq_batch_submit(smmu, &cmds);
2344 * Unfortunately, this can't be leaf-only since we may have
2345 * zapped an entire table.
2347 arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
2350 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
2351 unsigned long iova, size_t granule,
2352 void *cookie)
2354 struct arm_smmu_domain *smmu_domain = cookie;
2355 struct iommu_domain *domain = &smmu_domain->domain;
2357 iommu_iotlb_gather_add_page(domain, gather, iova, granule);
2360 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
2361 size_t granule, void *cookie)
2363 arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
2366 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
2367 size_t granule, void *cookie)
2369 arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
2372 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2373 .tlb_flush_all = arm_smmu_tlb_inv_context,
2374 .tlb_flush_walk = arm_smmu_tlb_inv_walk,
2375 .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
2376 .tlb_add_page = arm_smmu_tlb_inv_page_nosync,
2379 /* IOMMU API */
2380 static bool arm_smmu_capable(enum iommu_cap cap)
2382 switch (cap) {
2383 case IOMMU_CAP_CACHE_COHERENCY:
2384 return true;
2385 case IOMMU_CAP_NOEXEC:
2386 return true;
2387 default:
2388 return false;
2392 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2394 struct arm_smmu_domain *smmu_domain;
2396 if (type != IOMMU_DOMAIN_UNMANAGED &&
2397 type != IOMMU_DOMAIN_DMA &&
2398 type != IOMMU_DOMAIN_IDENTITY)
2399 return NULL;
2402 * Allocate the domain and initialise some of its data structures.
2403 * We can't really do anything meaningful until we've added a
2404 * master.
2406 smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2407 if (!smmu_domain)
2408 return NULL;
2410 if (type == IOMMU_DOMAIN_DMA &&
2411 iommu_get_dma_cookie(&smmu_domain->domain)) {
2412 kfree(smmu_domain);
2413 return NULL;
2416 mutex_init(&smmu_domain->init_mutex);
2417 INIT_LIST_HEAD(&smmu_domain->devices);
2418 spin_lock_init(&smmu_domain->devices_lock);
2420 return &smmu_domain->domain;
2423 static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
2425 int idx, size = 1 << span;
2427 do {
2428 idx = find_first_zero_bit(map, size);
2429 if (idx == size)
2430 return -ENOSPC;
2431 } while (test_and_set_bit(idx, map));
2433 return idx;
2436 static void arm_smmu_bitmap_free(unsigned long *map, int idx)
2438 clear_bit(idx, map);
2441 static void arm_smmu_domain_free(struct iommu_domain *domain)
2443 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2444 struct arm_smmu_device *smmu = smmu_domain->smmu;
2446 iommu_put_dma_cookie(domain);
2447 free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2449 /* Free the CD and ASID, if we allocated them */
2450 if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2451 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2453 if (cfg->cdcfg.cdtab) {
2454 arm_smmu_free_cd_tables(smmu_domain);
2455 arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
2457 } else {
2458 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2459 if (cfg->vmid)
2460 arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
2463 kfree(smmu_domain);
2466 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2467 struct arm_smmu_master *master,
2468 struct io_pgtable_cfg *pgtbl_cfg)
2470 int ret;
2471 int asid;
2472 struct arm_smmu_device *smmu = smmu_domain->smmu;
2473 struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
2474 typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2476 asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits);
2477 if (asid < 0)
2478 return asid;
2480 cfg->s1cdmax = master->ssid_bits;
2482 ret = arm_smmu_alloc_cd_tables(smmu_domain);
2483 if (ret)
2484 goto out_free_asid;
2486 cfg->cd.asid = (u16)asid;
2487 cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2488 cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2489 FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2490 FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2491 FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2492 FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2493 FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2494 CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2495 cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
2498 * Note that this will end up calling arm_smmu_sync_cd() before
2499 * the master has been added to the devices list for this domain.
2500 * This isn't an issue because the STE hasn't been installed yet.
2502 ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd);
2503 if (ret)
2504 goto out_free_cd_tables;
2506 return 0;
2508 out_free_cd_tables:
2509 arm_smmu_free_cd_tables(smmu_domain);
2510 out_free_asid:
2511 arm_smmu_bitmap_free(smmu->asid_map, asid);
2512 return ret;
2515 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2516 struct arm_smmu_master *master,
2517 struct io_pgtable_cfg *pgtbl_cfg)
2519 int vmid;
2520 struct arm_smmu_device *smmu = smmu_domain->smmu;
2521 struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2522 typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2524 vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
2525 if (vmid < 0)
2526 return vmid;
2528 vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2529 cfg->vmid = (u16)vmid;
2530 cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2531 cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2532 FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2533 FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2534 FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2535 FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2536 FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2537 FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2538 return 0;
2541 static int arm_smmu_domain_finalise(struct iommu_domain *domain,
2542 struct arm_smmu_master *master)
2544 int ret;
2545 unsigned long ias, oas;
2546 enum io_pgtable_fmt fmt;
2547 struct io_pgtable_cfg pgtbl_cfg;
2548 struct io_pgtable_ops *pgtbl_ops;
2549 int (*finalise_stage_fn)(struct arm_smmu_domain *,
2550 struct arm_smmu_master *,
2551 struct io_pgtable_cfg *);
2552 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2553 struct arm_smmu_device *smmu = smmu_domain->smmu;
2555 if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2556 smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2557 return 0;
2560 /* Restrict the stage to what we can actually support */
2561 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2562 smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2563 if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2564 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2566 switch (smmu_domain->stage) {
2567 case ARM_SMMU_DOMAIN_S1:
2568 ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2569 ias = min_t(unsigned long, ias, VA_BITS);
2570 oas = smmu->ias;
2571 fmt = ARM_64_LPAE_S1;
2572 finalise_stage_fn = arm_smmu_domain_finalise_s1;
2573 break;
2574 case ARM_SMMU_DOMAIN_NESTED:
2575 case ARM_SMMU_DOMAIN_S2:
2576 ias = smmu->ias;
2577 oas = smmu->oas;
2578 fmt = ARM_64_LPAE_S2;
2579 finalise_stage_fn = arm_smmu_domain_finalise_s2;
2580 break;
2581 default:
2582 return -EINVAL;
2585 pgtbl_cfg = (struct io_pgtable_cfg) {
2586 .pgsize_bitmap = smmu->pgsize_bitmap,
2587 .ias = ias,
2588 .oas = oas,
2589 .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
2590 .tlb = &arm_smmu_flush_ops,
2591 .iommu_dev = smmu->dev,
2594 if (smmu_domain->non_strict)
2595 pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
2597 pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2598 if (!pgtbl_ops)
2599 return -ENOMEM;
2601 domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2602 domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2603 domain->geometry.force_aperture = true;
2605 ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
2606 if (ret < 0) {
2607 free_io_pgtable_ops(pgtbl_ops);
2608 return ret;
2611 smmu_domain->pgtbl_ops = pgtbl_ops;
2612 return 0;
2615 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2617 __le64 *step;
2618 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2620 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2621 struct arm_smmu_strtab_l1_desc *l1_desc;
2622 int idx;
2624 /* Two-level walk */
2625 idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2626 l1_desc = &cfg->l1_desc[idx];
2627 idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
2628 step = &l1_desc->l2ptr[idx];
2629 } else {
2630 /* Simple linear lookup */
2631 step = &cfg->strtab[sid * STRTAB_STE_DWORDS];
2634 return step;
2637 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2639 int i, j;
2640 struct arm_smmu_device *smmu = master->smmu;
2642 for (i = 0; i < master->num_sids; ++i) {
2643 u32 sid = master->sids[i];
2644 __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
2646 /* Bridged PCI devices may end up with duplicated IDs */
2647 for (j = 0; j < i; j++)
2648 if (master->sids[j] == sid)
2649 break;
2650 if (j < i)
2651 continue;
2653 arm_smmu_write_strtab_ent(master, sid, step);
2657 #ifdef CONFIG_PCI_ATS
2658 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2660 struct pci_dev *pdev;
2661 struct arm_smmu_device *smmu = master->smmu;
2662 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2664 if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) ||
2665 !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled())
2666 return false;
2668 pdev = to_pci_dev(master->dev);
2669 return !pdev->untrusted && pdev->ats_cap;
2671 #else
2672 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2674 return false;
2676 #endif
2678 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2680 size_t stu;
2681 struct pci_dev *pdev;
2682 struct arm_smmu_device *smmu = master->smmu;
2683 struct arm_smmu_domain *smmu_domain = master->domain;
2685 /* Don't enable ATS at the endpoint if it's not enabled in the STE */
2686 if (!master->ats_enabled)
2687 return;
2689 /* Smallest Translation Unit: log2 of the smallest supported granule */
2690 stu = __ffs(smmu->pgsize_bitmap);
2691 pdev = to_pci_dev(master->dev);
2693 atomic_inc(&smmu_domain->nr_ats_masters);
2694 arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
2695 if (pci_enable_ats(pdev, stu))
2696 dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2699 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2701 struct arm_smmu_domain *smmu_domain = master->domain;
2703 if (!master->ats_enabled)
2704 return;
2706 pci_disable_ats(to_pci_dev(master->dev));
2708 * Ensure ATS is disabled at the endpoint before we issue the
2709 * ATC invalidation via the SMMU.
2711 wmb();
2712 arm_smmu_atc_inv_master(master);
2713 atomic_dec(&smmu_domain->nr_ats_masters);
2716 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2718 int ret;
2719 int features;
2720 int num_pasids;
2721 struct pci_dev *pdev;
2723 if (!dev_is_pci(master->dev))
2724 return -ENODEV;
2726 pdev = to_pci_dev(master->dev);
2728 features = pci_pasid_features(pdev);
2729 if (features < 0)
2730 return features;
2732 num_pasids = pci_max_pasids(pdev);
2733 if (num_pasids <= 0)
2734 return num_pasids;
2736 ret = pci_enable_pasid(pdev, features);
2737 if (ret) {
2738 dev_err(&pdev->dev, "Failed to enable PASID\n");
2739 return ret;
2742 master->ssid_bits = min_t(u8, ilog2(num_pasids),
2743 master->smmu->ssid_bits);
2744 return 0;
2747 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2749 struct pci_dev *pdev;
2751 if (!dev_is_pci(master->dev))
2752 return;
2754 pdev = to_pci_dev(master->dev);
2756 if (!pdev->pasid_enabled)
2757 return;
2759 master->ssid_bits = 0;
2760 pci_disable_pasid(pdev);
2763 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2765 unsigned long flags;
2766 struct arm_smmu_domain *smmu_domain = master->domain;
2768 if (!smmu_domain)
2769 return;
2771 arm_smmu_disable_ats(master);
2773 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2774 list_del(&master->domain_head);
2775 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2777 master->domain = NULL;
2778 master->ats_enabled = false;
2779 arm_smmu_install_ste_for_dev(master);
2782 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2784 int ret = 0;
2785 unsigned long flags;
2786 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2787 struct arm_smmu_device *smmu;
2788 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2789 struct arm_smmu_master *master;
2791 if (!fwspec)
2792 return -ENOENT;
2794 master = dev_iommu_priv_get(dev);
2795 smmu = master->smmu;
2797 arm_smmu_detach_dev(master);
2799 mutex_lock(&smmu_domain->init_mutex);
2801 if (!smmu_domain->smmu) {
2802 smmu_domain->smmu = smmu;
2803 ret = arm_smmu_domain_finalise(domain, master);
2804 if (ret) {
2805 smmu_domain->smmu = NULL;
2806 goto out_unlock;
2808 } else if (smmu_domain->smmu != smmu) {
2809 dev_err(dev,
2810 "cannot attach to SMMU %s (upstream of %s)\n",
2811 dev_name(smmu_domain->smmu->dev),
2812 dev_name(smmu->dev));
2813 ret = -ENXIO;
2814 goto out_unlock;
2815 } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
2816 master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
2817 dev_err(dev,
2818 "cannot attach to incompatible domain (%u SSID bits != %u)\n",
2819 smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
2820 ret = -EINVAL;
2821 goto out_unlock;
2824 master->domain = smmu_domain;
2826 if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2827 master->ats_enabled = arm_smmu_ats_supported(master);
2829 arm_smmu_install_ste_for_dev(master);
2831 spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2832 list_add(&master->domain_head, &smmu_domain->devices);
2833 spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2835 arm_smmu_enable_ats(master);
2837 out_unlock:
2838 mutex_unlock(&smmu_domain->init_mutex);
2839 return ret;
2842 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
2843 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
2845 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2847 if (!ops)
2848 return -ENODEV;
2850 return ops->map(ops, iova, paddr, size, prot);
2853 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
2854 size_t size, struct iommu_iotlb_gather *gather)
2856 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2857 struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2859 if (!ops)
2860 return 0;
2862 return ops->unmap(ops, iova, size, gather);
2865 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2867 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2869 if (smmu_domain->smmu)
2870 arm_smmu_tlb_inv_context(smmu_domain);
2873 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2874 struct iommu_iotlb_gather *gather)
2876 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2878 arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
2879 gather->pgsize, true, smmu_domain);
2882 static phys_addr_t
2883 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2885 struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2887 if (domain->type == IOMMU_DOMAIN_IDENTITY)
2888 return iova;
2890 if (!ops)
2891 return 0;
2893 return ops->iova_to_phys(ops, iova);
2896 static struct platform_driver arm_smmu_driver;
2898 static
2899 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2901 struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2902 fwnode);
2903 put_device(dev);
2904 return dev ? dev_get_drvdata(dev) : NULL;
2907 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2909 unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2911 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2912 limit *= 1UL << STRTAB_SPLIT;
2914 return sid < limit;
2917 static struct iommu_ops arm_smmu_ops;
2919 static int arm_smmu_add_device(struct device *dev)
2921 int i, ret;
2922 struct arm_smmu_device *smmu;
2923 struct arm_smmu_master *master;
2924 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2925 struct iommu_group *group;
2927 if (!fwspec || fwspec->ops != &arm_smmu_ops)
2928 return -ENODEV;
2930 if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2931 return -EBUSY;
2933 smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2934 if (!smmu)
2935 return -ENODEV;
2937 master = kzalloc(sizeof(*master), GFP_KERNEL);
2938 if (!master)
2939 return -ENOMEM;
2941 master->dev = dev;
2942 master->smmu = smmu;
2943 master->sids = fwspec->ids;
2944 master->num_sids = fwspec->num_ids;
2945 dev_iommu_priv_set(dev, master);
2947 /* Check the SIDs are in range of the SMMU and our stream table */
2948 for (i = 0; i < master->num_sids; i++) {
2949 u32 sid = master->sids[i];
2951 if (!arm_smmu_sid_in_range(smmu, sid)) {
2952 ret = -ERANGE;
2953 goto err_free_master;
2956 /* Ensure l2 strtab is initialised */
2957 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2958 ret = arm_smmu_init_l2_strtab(smmu, sid);
2959 if (ret)
2960 goto err_free_master;
2964 master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);
2967 * Note that PASID must be enabled before, and disabled after ATS:
2968 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2970 * Behavior is undefined if this bit is Set and the value of the PASID
2971 * Enable, Execute Requested Enable, or Privileged Mode Requested bits
2972 * are changed.
2974 arm_smmu_enable_pasid(master);
2976 if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2977 master->ssid_bits = min_t(u8, master->ssid_bits,
2978 CTXDESC_LINEAR_CDMAX);
2980 ret = iommu_device_link(&smmu->iommu, dev);
2981 if (ret)
2982 goto err_disable_pasid;
2984 group = iommu_group_get_for_dev(dev);
2985 if (IS_ERR(group)) {
2986 ret = PTR_ERR(group);
2987 goto err_unlink;
2990 iommu_group_put(group);
2991 return 0;
2993 err_unlink:
2994 iommu_device_unlink(&smmu->iommu, dev);
2995 err_disable_pasid:
2996 arm_smmu_disable_pasid(master);
2997 err_free_master:
2998 kfree(master);
2999 dev_iommu_priv_set(dev, NULL);
3000 return ret;
3003 static void arm_smmu_remove_device(struct device *dev)
3005 struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
3006 struct arm_smmu_master *master;
3007 struct arm_smmu_device *smmu;
3009 if (!fwspec || fwspec->ops != &arm_smmu_ops)
3010 return;
3012 master = dev_iommu_priv_get(dev);
3013 smmu = master->smmu;
3014 arm_smmu_detach_dev(master);
3015 iommu_group_remove_device(dev);
3016 iommu_device_unlink(&smmu->iommu, dev);
3017 arm_smmu_disable_pasid(master);
3018 kfree(master);
3019 iommu_fwspec_free(dev);
3022 static struct iommu_group *arm_smmu_device_group(struct device *dev)
3024 struct iommu_group *group;
3027 * We don't support devices sharing stream IDs other than PCI RID
3028 * aliases, since the necessary ID-to-device lookup becomes rather
3029 * impractical given a potential sparse 32-bit stream ID space.
3031 if (dev_is_pci(dev))
3032 group = pci_device_group(dev);
3033 else
3034 group = generic_device_group(dev);
3036 return group;
3039 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
3040 enum iommu_attr attr, void *data)
3042 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3044 switch (domain->type) {
3045 case IOMMU_DOMAIN_UNMANAGED:
3046 switch (attr) {
3047 case DOMAIN_ATTR_NESTING:
3048 *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
3049 return 0;
3050 default:
3051 return -ENODEV;
3053 break;
3054 case IOMMU_DOMAIN_DMA:
3055 switch (attr) {
3056 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
3057 *(int *)data = smmu_domain->non_strict;
3058 return 0;
3059 default:
3060 return -ENODEV;
3062 break;
3063 default:
3064 return -EINVAL;
3068 static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
3069 enum iommu_attr attr, void *data)
3071 int ret = 0;
3072 struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
3074 mutex_lock(&smmu_domain->init_mutex);
3076 switch (domain->type) {
3077 case IOMMU_DOMAIN_UNMANAGED:
3078 switch (attr) {
3079 case DOMAIN_ATTR_NESTING:
3080 if (smmu_domain->smmu) {
3081 ret = -EPERM;
3082 goto out_unlock;
3085 if (*(int *)data)
3086 smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
3087 else
3088 smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
3089 break;
3090 default:
3091 ret = -ENODEV;
3093 break;
3094 case IOMMU_DOMAIN_DMA:
3095 switch(attr) {
3096 case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
3097 smmu_domain->non_strict = *(int *)data;
3098 break;
3099 default:
3100 ret = -ENODEV;
3102 break;
3103 default:
3104 ret = -EINVAL;
3107 out_unlock:
3108 mutex_unlock(&smmu_domain->init_mutex);
3109 return ret;
3112 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
3114 return iommu_fwspec_add_ids(dev, args->args, 1);
3117 static void arm_smmu_get_resv_regions(struct device *dev,
3118 struct list_head *head)
3120 struct iommu_resv_region *region;
3121 int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
3123 region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
3124 prot, IOMMU_RESV_SW_MSI);
3125 if (!region)
3126 return;
3128 list_add_tail(&region->list, head);
3130 iommu_dma_get_resv_regions(dev, head);
3133 static struct iommu_ops arm_smmu_ops = {
3134 .capable = arm_smmu_capable,
3135 .domain_alloc = arm_smmu_domain_alloc,
3136 .domain_free = arm_smmu_domain_free,
3137 .attach_dev = arm_smmu_attach_dev,
3138 .map = arm_smmu_map,
3139 .unmap = arm_smmu_unmap,
3140 .flush_iotlb_all = arm_smmu_flush_iotlb_all,
3141 .iotlb_sync = arm_smmu_iotlb_sync,
3142 .iova_to_phys = arm_smmu_iova_to_phys,
3143 .add_device = arm_smmu_add_device,
3144 .remove_device = arm_smmu_remove_device,
3145 .device_group = arm_smmu_device_group,
3146 .domain_get_attr = arm_smmu_domain_get_attr,
3147 .domain_set_attr = arm_smmu_domain_set_attr,
3148 .of_xlate = arm_smmu_of_xlate,
3149 .get_resv_regions = arm_smmu_get_resv_regions,
3150 .put_resv_regions = generic_iommu_put_resv_regions,
3151 .pgsize_bitmap = -1UL, /* Restricted during device attach */
3154 /* Probing and initialisation functions */
3155 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
3156 struct arm_smmu_queue *q,
3157 unsigned long prod_off,
3158 unsigned long cons_off,
3159 size_t dwords, const char *name)
3161 size_t qsz;
3163 do {
3164 qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
3165 q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
3166 GFP_KERNEL);
3167 if (q->base || qsz < PAGE_SIZE)
3168 break;
3170 q->llq.max_n_shift--;
3171 } while (1);
3173 if (!q->base) {
3174 dev_err(smmu->dev,
3175 "failed to allocate queue (0x%zx bytes) for %s\n",
3176 qsz, name);
3177 return -ENOMEM;
3180 if (!WARN_ON(q->base_dma & (qsz - 1))) {
3181 dev_info(smmu->dev, "allocated %u entries for %s\n",
3182 1 << q->llq.max_n_shift, name);
3185 q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
3186 q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
3187 q->ent_dwords = dwords;
3189 q->q_base = Q_BASE_RWA;
3190 q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
3191 q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
3193 q->llq.prod = q->llq.cons = 0;
3194 return 0;
3197 static void arm_smmu_cmdq_free_bitmap(void *data)
3199 unsigned long *bitmap = data;
3200 bitmap_free(bitmap);
3203 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
3205 int ret = 0;
3206 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
3207 unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
3208 atomic_long_t *bitmap;
3210 atomic_set(&cmdq->owner_prod, 0);
3211 atomic_set(&cmdq->lock, 0);
3213 bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
3214 if (!bitmap) {
3215 dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
3216 ret = -ENOMEM;
3217 } else {
3218 cmdq->valid_map = bitmap;
3219 devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
3222 return ret;
3225 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
3227 int ret;
3229 /* cmdq */
3230 ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
3231 ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
3232 "cmdq");
3233 if (ret)
3234 return ret;
3236 ret = arm_smmu_cmdq_init(smmu);
3237 if (ret)
3238 return ret;
3240 /* evtq */
3241 ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
3242 ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
3243 "evtq");
3244 if (ret)
3245 return ret;
3247 /* priq */
3248 if (!(smmu->features & ARM_SMMU_FEAT_PRI))
3249 return 0;
3251 return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD,
3252 ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS,
3253 "priq");
3256 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
3258 unsigned int i;
3259 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3260 size_t size = sizeof(*cfg->l1_desc) * cfg->num_l1_ents;
3261 void *strtab = smmu->strtab_cfg.strtab;
3263 cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
3264 if (!cfg->l1_desc) {
3265 dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
3266 return -ENOMEM;
3269 for (i = 0; i < cfg->num_l1_ents; ++i) {
3270 arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
3271 strtab += STRTAB_L1_DESC_DWORDS << 3;
3274 return 0;
3277 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
3279 void *strtab;
3280 u64 reg;
3281 u32 size, l1size;
3282 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3284 /* Calculate the L1 size, capped to the SIDSIZE. */
3285 size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
3286 size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3287 cfg->num_l1_ents = 1 << size;
3289 size += STRTAB_SPLIT;
3290 if (size < smmu->sid_bits)
3291 dev_warn(smmu->dev,
3292 "2-level strtab only covers %u/%u bits of SID\n",
3293 size, smmu->sid_bits);
3295 l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3296 strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3297 GFP_KERNEL);
3298 if (!strtab) {
3299 dev_err(smmu->dev,
3300 "failed to allocate l1 stream table (%u bytes)\n",
3301 size);
3302 return -ENOMEM;
3304 cfg->strtab = strtab;
3306 /* Configure strtab_base_cfg for 2 levels */
3307 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3308 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3309 reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3310 cfg->strtab_base_cfg = reg;
3312 return arm_smmu_init_l1_strtab(smmu);
3315 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3317 void *strtab;
3318 u64 reg;
3319 u32 size;
3320 struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3322 size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3323 strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3324 GFP_KERNEL);
3325 if (!strtab) {
3326 dev_err(smmu->dev,
3327 "failed to allocate linear stream table (%u bytes)\n",
3328 size);
3329 return -ENOMEM;
3331 cfg->strtab = strtab;
3332 cfg->num_l1_ents = 1 << smmu->sid_bits;
3334 /* Configure strtab_base_cfg for a linear table covering all SIDs */
3335 reg = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3336 reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3337 cfg->strtab_base_cfg = reg;
3339 arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
3340 return 0;
3343 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3345 u64 reg;
3346 int ret;
3348 if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3349 ret = arm_smmu_init_strtab_2lvl(smmu);
3350 else
3351 ret = arm_smmu_init_strtab_linear(smmu);
3353 if (ret)
3354 return ret;
3356 /* Set the strtab base address */
3357 reg = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3358 reg |= STRTAB_BASE_RA;
3359 smmu->strtab_cfg.strtab_base = reg;
3361 /* Allocate the first VMID for stage-2 bypass STEs */
3362 set_bit(0, smmu->vmid_map);
3363 return 0;
3366 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3368 int ret;
3370 ret = arm_smmu_init_queues(smmu);
3371 if (ret)
3372 return ret;
3374 return arm_smmu_init_strtab(smmu);
3377 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3378 unsigned int reg_off, unsigned int ack_off)
3380 u32 reg;
3382 writel_relaxed(val, smmu->base + reg_off);
3383 return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3384 1, ARM_SMMU_POLL_TIMEOUT_US);
3387 /* GBPA is "special" */
3388 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3390 int ret;
3391 u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3393 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3394 1, ARM_SMMU_POLL_TIMEOUT_US);
3395 if (ret)
3396 return ret;
3398 reg &= ~clr;
3399 reg |= set;
3400 writel_relaxed(reg | GBPA_UPDATE, gbpa);
3401 ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3402 1, ARM_SMMU_POLL_TIMEOUT_US);
3404 if (ret)
3405 dev_err(smmu->dev, "GBPA not responding to update\n");
3406 return ret;
3409 static void arm_smmu_free_msis(void *data)
3411 struct device *dev = data;
3412 platform_msi_domain_free_irqs(dev);
3415 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3417 phys_addr_t doorbell;
3418 struct device *dev = msi_desc_to_dev(desc);
3419 struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3420 phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
3422 doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3423 doorbell &= MSI_CFG0_ADDR_MASK;
3425 writeq_relaxed(doorbell, smmu->base + cfg[0]);
3426 writel_relaxed(msg->data, smmu->base + cfg[1]);
3427 writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3430 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3432 struct msi_desc *desc;
3433 int ret, nvec = ARM_SMMU_MAX_MSIS;
3434 struct device *dev = smmu->dev;
3436 /* Clear the MSI address regs */
3437 writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3438 writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3440 if (smmu->features & ARM_SMMU_FEAT_PRI)
3441 writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3442 else
3443 nvec--;
3445 if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3446 return;
3448 if (!dev->msi_domain) {
3449 dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3450 return;
3453 /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3454 ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3455 if (ret) {
3456 dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3457 return;
3460 for_each_msi_entry(desc, dev) {
3461 switch (desc->platform.msi_index) {
3462 case EVTQ_MSI_INDEX:
3463 smmu->evtq.q.irq = desc->irq;
3464 break;
3465 case GERROR_MSI_INDEX:
3466 smmu->gerr_irq = desc->irq;
3467 break;
3468 case PRIQ_MSI_INDEX:
3469 smmu->priq.q.irq = desc->irq;
3470 break;
3471 default: /* Unknown */
3472 continue;
3476 /* Add callback to free MSIs on teardown */
3477 devm_add_action(dev, arm_smmu_free_msis, dev);
3480 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3482 int irq, ret;
3484 arm_smmu_setup_msis(smmu);
3486 /* Request interrupt lines */
3487 irq = smmu->evtq.q.irq;
3488 if (irq) {
3489 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3490 arm_smmu_evtq_thread,
3491 IRQF_ONESHOT,
3492 "arm-smmu-v3-evtq", smmu);
3493 if (ret < 0)
3494 dev_warn(smmu->dev, "failed to enable evtq irq\n");
3495 } else {
3496 dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3499 irq = smmu->gerr_irq;
3500 if (irq) {
3501 ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3502 0, "arm-smmu-v3-gerror", smmu);
3503 if (ret < 0)
3504 dev_warn(smmu->dev, "failed to enable gerror irq\n");
3505 } else {
3506 dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3509 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3510 irq = smmu->priq.q.irq;
3511 if (irq) {
3512 ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3513 arm_smmu_priq_thread,
3514 IRQF_ONESHOT,
3515 "arm-smmu-v3-priq",
3516 smmu);
3517 if (ret < 0)
3518 dev_warn(smmu->dev,
3519 "failed to enable priq irq\n");
3520 } else {
3521 dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3526 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3528 int ret, irq;
3529 u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3531 /* Disable IRQs first */
3532 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3533 ARM_SMMU_IRQ_CTRLACK);
3534 if (ret) {
3535 dev_err(smmu->dev, "failed to disable irqs\n");
3536 return ret;
3539 irq = smmu->combined_irq;
3540 if (irq) {
3542 * Cavium ThunderX2 implementation doesn't support unique irq
3543 * lines. Use a single irq line for all the SMMUv3 interrupts.
3545 ret = devm_request_threaded_irq(smmu->dev, irq,
3546 arm_smmu_combined_irq_handler,
3547 arm_smmu_combined_irq_thread,
3548 IRQF_ONESHOT,
3549 "arm-smmu-v3-combined-irq", smmu);
3550 if (ret < 0)
3551 dev_warn(smmu->dev, "failed to enable combined irq\n");
3552 } else
3553 arm_smmu_setup_unique_irqs(smmu);
3555 if (smmu->features & ARM_SMMU_FEAT_PRI)
3556 irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3558 /* Enable interrupt generation on the SMMU */
3559 ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3560 ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3561 if (ret)
3562 dev_warn(smmu->dev, "failed to enable irqs\n");
3564 return 0;
3567 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3569 int ret;
3571 ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3572 if (ret)
3573 dev_err(smmu->dev, "failed to clear cr0\n");
3575 return ret;
3578 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3580 int ret;
3581 u32 reg, enables;
3582 struct arm_smmu_cmdq_ent cmd;
3584 /* Clear CR0 and sync (disables SMMU and queue processing) */
3585 reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3586 if (reg & CR0_SMMUEN) {
3587 dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3588 WARN_ON(is_kdump_kernel() && !disable_bypass);
3589 arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3592 ret = arm_smmu_device_disable(smmu);
3593 if (ret)
3594 return ret;
3596 /* CR1 (table and queue memory attributes) */
3597 reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3598 FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3599 FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3600 FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3601 FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3602 FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3603 writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3605 /* CR2 (random crap) */
3606 reg = CR2_PTM | CR2_RECINVSID | CR2_E2H;
3607 writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3609 /* Stream table */
3610 writeq_relaxed(smmu->strtab_cfg.strtab_base,
3611 smmu->base + ARM_SMMU_STRTAB_BASE);
3612 writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3613 smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3615 /* Command queue */
3616 writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3617 writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3618 writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3620 enables = CR0_CMDQEN;
3621 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3622 ARM_SMMU_CR0ACK);
3623 if (ret) {
3624 dev_err(smmu->dev, "failed to enable command queue\n");
3625 return ret;
3628 /* Invalidate any cached configuration */
3629 cmd.opcode = CMDQ_OP_CFGI_ALL;
3630 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3631 arm_smmu_cmdq_issue_sync(smmu);
3633 /* Invalidate any stale TLB entries */
3634 if (smmu->features & ARM_SMMU_FEAT_HYP) {
3635 cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3636 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3639 cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3640 arm_smmu_cmdq_issue_cmd(smmu, &cmd);
3641 arm_smmu_cmdq_issue_sync(smmu);
3643 /* Event queue */
3644 writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3645 writel_relaxed(smmu->evtq.q.llq.prod,
3646 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
3647 writel_relaxed(smmu->evtq.q.llq.cons,
3648 arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
3650 enables |= CR0_EVTQEN;
3651 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3652 ARM_SMMU_CR0ACK);
3653 if (ret) {
3654 dev_err(smmu->dev, "failed to enable event queue\n");
3655 return ret;
3658 /* PRI queue */
3659 if (smmu->features & ARM_SMMU_FEAT_PRI) {
3660 writeq_relaxed(smmu->priq.q.q_base,
3661 smmu->base + ARM_SMMU_PRIQ_BASE);
3662 writel_relaxed(smmu->priq.q.llq.prod,
3663 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
3664 writel_relaxed(smmu->priq.q.llq.cons,
3665 arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
3667 enables |= CR0_PRIQEN;
3668 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3669 ARM_SMMU_CR0ACK);
3670 if (ret) {
3671 dev_err(smmu->dev, "failed to enable PRI queue\n");
3672 return ret;
3676 if (smmu->features & ARM_SMMU_FEAT_ATS) {
3677 enables |= CR0_ATSCHK;
3678 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3679 ARM_SMMU_CR0ACK);
3680 if (ret) {
3681 dev_err(smmu->dev, "failed to enable ATS check\n");
3682 return ret;
3686 ret = arm_smmu_setup_irqs(smmu);
3687 if (ret) {
3688 dev_err(smmu->dev, "failed to setup irqs\n");
3689 return ret;
3692 if (is_kdump_kernel())
3693 enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3695 /* Enable the SMMU interface, or ensure bypass */
3696 if (!bypass || disable_bypass) {
3697 enables |= CR0_SMMUEN;
3698 } else {
3699 ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3700 if (ret)
3701 return ret;
3703 ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3704 ARM_SMMU_CR0ACK);
3705 if (ret) {
3706 dev_err(smmu->dev, "failed to enable SMMU interface\n");
3707 return ret;
3710 return 0;
3713 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3715 u32 reg;
3716 bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3718 /* IDR0 */
3719 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3721 /* 2-level structures */
3722 if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3723 smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3725 if (reg & IDR0_CD2L)
3726 smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3729 * Translation table endianness.
3730 * We currently require the same endianness as the CPU, but this
3731 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3733 switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3734 case IDR0_TTENDIAN_MIXED:
3735 smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3736 break;
3737 #ifdef __BIG_ENDIAN
3738 case IDR0_TTENDIAN_BE:
3739 smmu->features |= ARM_SMMU_FEAT_TT_BE;
3740 break;
3741 #else
3742 case IDR0_TTENDIAN_LE:
3743 smmu->features |= ARM_SMMU_FEAT_TT_LE;
3744 break;
3745 #endif
3746 default:
3747 dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3748 return -ENXIO;
3751 /* Boolean feature flags */
3752 if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3753 smmu->features |= ARM_SMMU_FEAT_PRI;
3755 if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3756 smmu->features |= ARM_SMMU_FEAT_ATS;
3758 if (reg & IDR0_SEV)
3759 smmu->features |= ARM_SMMU_FEAT_SEV;
3761 if (reg & IDR0_MSI)
3762 smmu->features |= ARM_SMMU_FEAT_MSI;
3764 if (reg & IDR0_HYP)
3765 smmu->features |= ARM_SMMU_FEAT_HYP;
3768 * The coherency feature as set by FW is used in preference to the ID
3769 * register, but warn on mismatch.
3771 if (!!(reg & IDR0_COHACC) != coherent)
3772 dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3773 coherent ? "true" : "false");
3775 switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3776 case IDR0_STALL_MODEL_FORCE:
3777 smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3778 /* Fallthrough */
3779 case IDR0_STALL_MODEL_STALL:
3780 smmu->features |= ARM_SMMU_FEAT_STALLS;
3783 if (reg & IDR0_S1P)
3784 smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3786 if (reg & IDR0_S2P)
3787 smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3789 if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3790 dev_err(smmu->dev, "no translation support!\n");
3791 return -ENXIO;
3794 /* We only support the AArch64 table format at present */
3795 switch (FIELD_GET(IDR0_TTF, reg)) {
3796 case IDR0_TTF_AARCH32_64:
3797 smmu->ias = 40;
3798 /* Fallthrough */
3799 case IDR0_TTF_AARCH64:
3800 break;
3801 default:
3802 dev_err(smmu->dev, "AArch64 table format not supported!\n");
3803 return -ENXIO;
3806 /* ASID/VMID sizes */
3807 smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3808 smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3810 /* IDR1 */
3811 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3812 if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3813 dev_err(smmu->dev, "embedded implementation not supported\n");
3814 return -ENXIO;
3817 /* Queue sizes, capped to ensure natural alignment */
3818 smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3819 FIELD_GET(IDR1_CMDQS, reg));
3820 if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3822 * We don't support splitting up batches, so one batch of
3823 * commands plus an extra sync needs to fit inside the command
3824 * queue. There's also no way we can handle the weird alignment
3825 * restrictions on the base pointer for a unit-length queue.
3827 dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3828 CMDQ_BATCH_ENTRIES);
3829 return -ENXIO;
3832 smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3833 FIELD_GET(IDR1_EVTQS, reg));
3834 smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3835 FIELD_GET(IDR1_PRIQS, reg));
3837 /* SID/SSID sizes */
3838 smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3839 smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3842 * If the SMMU supports fewer bits than would fill a single L2 stream
3843 * table, use a linear table instead.
3845 if (smmu->sid_bits <= STRTAB_SPLIT)
3846 smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3848 /* IDR3 */
3849 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3850 if (FIELD_GET(IDR3_RIL, reg))
3851 smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3853 /* IDR5 */
3854 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3856 /* Maximum number of outstanding stalls */
3857 smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3859 /* Page sizes */
3860 if (reg & IDR5_GRAN64K)
3861 smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3862 if (reg & IDR5_GRAN16K)
3863 smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3864 if (reg & IDR5_GRAN4K)
3865 smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3867 /* Input address size */
3868 if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3869 smmu->features |= ARM_SMMU_FEAT_VAX;
3871 /* Output address size */
3872 switch (FIELD_GET(IDR5_OAS, reg)) {
3873 case IDR5_OAS_32_BIT:
3874 smmu->oas = 32;
3875 break;
3876 case IDR5_OAS_36_BIT:
3877 smmu->oas = 36;
3878 break;
3879 case IDR5_OAS_40_BIT:
3880 smmu->oas = 40;
3881 break;
3882 case IDR5_OAS_42_BIT:
3883 smmu->oas = 42;
3884 break;
3885 case IDR5_OAS_44_BIT:
3886 smmu->oas = 44;
3887 break;
3888 case IDR5_OAS_52_BIT:
3889 smmu->oas = 52;
3890 smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3891 break;
3892 default:
3893 dev_info(smmu->dev,
3894 "unknown output address size. Truncating to 48-bit\n");
3895 /* Fallthrough */
3896 case IDR5_OAS_48_BIT:
3897 smmu->oas = 48;
3900 if (arm_smmu_ops.pgsize_bitmap == -1UL)
3901 arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3902 else
3903 arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3905 /* Set the DMA mask for our table walker */
3906 if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3907 dev_warn(smmu->dev,
3908 "failed to set DMA mask for table walker\n");
3910 smmu->ias = max(smmu->ias, smmu->oas);
3912 dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3913 smmu->ias, smmu->oas, smmu->features);
3914 return 0;
3917 #ifdef CONFIG_ACPI
3918 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3920 switch (model) {
3921 case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3922 smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3923 break;
3924 case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3925 smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3926 break;
3929 dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3932 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3933 struct arm_smmu_device *smmu)
3935 struct acpi_iort_smmu_v3 *iort_smmu;
3936 struct device *dev = smmu->dev;
3937 struct acpi_iort_node *node;
3939 node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3941 /* Retrieve SMMUv3 specific data */
3942 iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3944 acpi_smmu_get_options(iort_smmu->model, smmu);
3946 if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3947 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3949 return 0;
3951 #else
3952 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3953 struct arm_smmu_device *smmu)
3955 return -ENODEV;
3957 #endif
3959 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3960 struct arm_smmu_device *smmu)
3962 struct device *dev = &pdev->dev;
3963 u32 cells;
3964 int ret = -EINVAL;
3966 if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3967 dev_err(dev, "missing #iommu-cells property\n");
3968 else if (cells != 1)
3969 dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3970 else
3971 ret = 0;
3973 parse_driver_options(smmu);
3975 if (of_dma_is_coherent(dev->of_node))
3976 smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3978 return ret;
3981 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3983 if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3984 return SZ_64K;
3985 else
3986 return SZ_128K;
3989 static int arm_smmu_set_bus_ops(struct iommu_ops *ops)
3991 int err;
3993 #ifdef CONFIG_PCI
3994 if (pci_bus_type.iommu_ops != ops) {
3995 err = bus_set_iommu(&pci_bus_type, ops);
3996 if (err)
3997 return err;
3999 #endif
4000 #ifdef CONFIG_ARM_AMBA
4001 if (amba_bustype.iommu_ops != ops) {
4002 err = bus_set_iommu(&amba_bustype, ops);
4003 if (err)
4004 goto err_reset_pci_ops;
4006 #endif
4007 if (platform_bus_type.iommu_ops != ops) {
4008 err = bus_set_iommu(&platform_bus_type, ops);
4009 if (err)
4010 goto err_reset_amba_ops;
4013 return 0;
4015 err_reset_amba_ops:
4016 #ifdef CONFIG_ARM_AMBA
4017 bus_set_iommu(&amba_bustype, NULL);
4018 #endif
4019 err_reset_pci_ops: __maybe_unused;
4020 #ifdef CONFIG_PCI
4021 bus_set_iommu(&pci_bus_type, NULL);
4022 #endif
4023 return err;
4026 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
4027 resource_size_t size)
4029 struct resource res = {
4030 .flags = IORESOURCE_MEM,
4031 .start = start,
4032 .end = start + size - 1,
4035 return devm_ioremap_resource(dev, &res);
4038 static int arm_smmu_device_probe(struct platform_device *pdev)
4040 int irq, ret;
4041 struct resource *res;
4042 resource_size_t ioaddr;
4043 struct arm_smmu_device *smmu;
4044 struct device *dev = &pdev->dev;
4045 bool bypass;
4047 smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
4048 if (!smmu) {
4049 dev_err(dev, "failed to allocate arm_smmu_device\n");
4050 return -ENOMEM;
4052 smmu->dev = dev;
4054 if (dev->of_node) {
4055 ret = arm_smmu_device_dt_probe(pdev, smmu);
4056 } else {
4057 ret = arm_smmu_device_acpi_probe(pdev, smmu);
4058 if (ret == -ENODEV)
4059 return ret;
4062 /* Set bypass mode according to firmware probing result */
4063 bypass = !!ret;
4065 /* Base address */
4066 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
4067 if (resource_size(res) < arm_smmu_resource_size(smmu)) {
4068 dev_err(dev, "MMIO region too small (%pr)\n", res);
4069 return -EINVAL;
4071 ioaddr = res->start;
4074 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
4075 * the PMCG registers which are reserved by the PMU driver.
4077 smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
4078 if (IS_ERR(smmu->base))
4079 return PTR_ERR(smmu->base);
4081 if (arm_smmu_resource_size(smmu) > SZ_64K) {
4082 smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
4083 ARM_SMMU_REG_SZ);
4084 if (IS_ERR(smmu->page1))
4085 return PTR_ERR(smmu->page1);
4086 } else {
4087 smmu->page1 = smmu->base;
4090 /* Interrupt lines */
4092 irq = platform_get_irq_byname_optional(pdev, "combined");
4093 if (irq > 0)
4094 smmu->combined_irq = irq;
4095 else {
4096 irq = platform_get_irq_byname_optional(pdev, "eventq");
4097 if (irq > 0)
4098 smmu->evtq.q.irq = irq;
4100 irq = platform_get_irq_byname_optional(pdev, "priq");
4101 if (irq > 0)
4102 smmu->priq.q.irq = irq;
4104 irq = platform_get_irq_byname_optional(pdev, "gerror");
4105 if (irq > 0)
4106 smmu->gerr_irq = irq;
4108 /* Probe the h/w */
4109 ret = arm_smmu_device_hw_probe(smmu);
4110 if (ret)
4111 return ret;
4113 /* Initialise in-memory data structures */
4114 ret = arm_smmu_init_structures(smmu);
4115 if (ret)
4116 return ret;
4118 /* Record our private device structure */
4119 platform_set_drvdata(pdev, smmu);
4121 /* Reset the device */
4122 ret = arm_smmu_device_reset(smmu, bypass);
4123 if (ret)
4124 return ret;
4126 /* And we're up. Go go go! */
4127 ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
4128 "smmu3.%pa", &ioaddr);
4129 if (ret)
4130 return ret;
4132 iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
4133 iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);
4135 ret = iommu_device_register(&smmu->iommu);
4136 if (ret) {
4137 dev_err(dev, "Failed to register iommu\n");
4138 return ret;
4141 return arm_smmu_set_bus_ops(&arm_smmu_ops);
4144 static int arm_smmu_device_remove(struct platform_device *pdev)
4146 struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
4148 arm_smmu_set_bus_ops(NULL);
4149 iommu_device_unregister(&smmu->iommu);
4150 iommu_device_sysfs_remove(&smmu->iommu);
4151 arm_smmu_device_disable(smmu);
4153 return 0;
4156 static void arm_smmu_device_shutdown(struct platform_device *pdev)
4158 arm_smmu_device_remove(pdev);
4161 static const struct of_device_id arm_smmu_of_match[] = {
4162 { .compatible = "arm,smmu-v3", },
4163 { },
4165 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
4167 static struct platform_driver arm_smmu_driver = {
4168 .driver = {
4169 .name = "arm-smmu-v3",
4170 .of_match_table = arm_smmu_of_match,
4171 .suppress_bind_attrs = true,
4173 .probe = arm_smmu_device_probe,
4174 .remove = arm_smmu_device_remove,
4175 .shutdown = arm_smmu_device_shutdown,
4177 module_platform_driver(arm_smmu_driver);
4179 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
4180 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
4181 MODULE_ALIAS("platform:arm-smmu-v3");
4182 MODULE_LICENSE("GPL v2");