1 // SPDX-License-Identifier: GPL-2.0-only
3 * CPU-agnostic ARM page table allocator.
5 * Copyright (C) 2014 ARM Limited
7 * Author: Will Deacon <will.deacon@arm.com>
10 #define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt
12 #include <linux/atomic.h>
13 #include <linux/bitops.h>
14 #include <linux/io-pgtable.h>
15 #include <linux/kernel.h>
16 #include <linux/sizes.h>
17 #include <linux/slab.h>
18 #include <linux/types.h>
19 #include <linux/dma-mapping.h>
21 #include <asm/barrier.h>
23 #include "io-pgtable-arm.h"
25 #define ARM_LPAE_MAX_ADDR_BITS 52
26 #define ARM_LPAE_S2_MAX_CONCAT_PAGES 16
27 #define ARM_LPAE_MAX_LEVELS 4
29 /* Struct accessors */
30 #define io_pgtable_to_data(x) \
31 container_of((x), struct arm_lpae_io_pgtable, iop)
33 #define io_pgtable_ops_to_data(x) \
34 io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
37 * Calculate the right shift amount to get to the portion describing level l
38 * in a virtual address mapped by the pagetable in d.
40 #define ARM_LPAE_LVL_SHIFT(l,d) \
41 (((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level) + \
42 ilog2(sizeof(arm_lpae_iopte)))
44 #define ARM_LPAE_GRANULE(d) \
45 (sizeof(arm_lpae_iopte) << (d)->bits_per_level)
46 #define ARM_LPAE_PGD_SIZE(d) \
47 (sizeof(arm_lpae_iopte) << (d)->pgd_bits)
50 * Calculate the index at level l used to map virtual address a using the
53 #define ARM_LPAE_PGD_IDX(l,d) \
54 ((l) == (d)->start_level ? (d)->pgd_bits - (d)->bits_per_level : 0)
56 #define ARM_LPAE_LVL_IDX(a,l,d) \
57 (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \
58 ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1))
60 /* Calculate the block/page mapping size at level l for pagetable in d. */
61 #define ARM_LPAE_BLOCK_SIZE(l,d) (1ULL << ARM_LPAE_LVL_SHIFT(l,d))
64 #define ARM_LPAE_PTE_TYPE_SHIFT 0
65 #define ARM_LPAE_PTE_TYPE_MASK 0x3
67 #define ARM_LPAE_PTE_TYPE_BLOCK 1
68 #define ARM_LPAE_PTE_TYPE_TABLE 3
69 #define ARM_LPAE_PTE_TYPE_PAGE 3
71 #define ARM_LPAE_PTE_ADDR_MASK GENMASK_ULL(47,12)
73 #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63)
74 #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53)
75 #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10)
76 #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8)
77 #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8)
78 #define ARM_LPAE_PTE_SH_IS (((arm_lpae_iopte)3) << 8)
79 #define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5)
80 #define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0)
82 #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2)
83 /* Ignore the contiguous bit for block splitting */
84 #define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52)
85 #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \
86 ARM_LPAE_PTE_ATTR_HI_MASK)
87 /* Software bit for solving coherency races */
88 #define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55)
91 #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6)
92 #define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6)
93 #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2
94 #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11)
97 #define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6)
98 #define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6)
99 #define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6)
100 #define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2)
101 #define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2)
102 #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2)
105 #define ARM_LPAE_VTCR_SL0_MASK 0x3
107 #define ARM_LPAE_TCR_T0SZ_SHIFT 0
109 #define ARM_LPAE_VTCR_PS_SHIFT 16
110 #define ARM_LPAE_VTCR_PS_MASK 0x7
112 #define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3)
113 #define ARM_LPAE_MAIR_ATTR_MASK 0xff
114 #define ARM_LPAE_MAIR_ATTR_DEVICE 0x04
115 #define ARM_LPAE_MAIR_ATTR_NC 0x44
116 #define ARM_LPAE_MAIR_ATTR_INC_OWBRWA 0xf4
117 #define ARM_LPAE_MAIR_ATTR_WBRWA 0xff
118 #define ARM_LPAE_MAIR_ATTR_IDX_NC 0
119 #define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1
120 #define ARM_LPAE_MAIR_ATTR_IDX_DEV 2
121 #define ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE 3
123 #define ARM_MALI_LPAE_TTBR_ADRMODE_TABLE (3u << 0)
124 #define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2)
125 #define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4)
127 #define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL
128 #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
130 /* IOPTE accessors */
131 #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
133 #define iopte_type(pte) \
134 (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
136 #define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK)
138 struct arm_lpae_io_pgtable
{
139 struct io_pgtable iop
;
148 typedef u64 arm_lpae_iopte
;
150 static inline bool iopte_leaf(arm_lpae_iopte pte
, int lvl
,
151 enum io_pgtable_fmt fmt
)
153 if (lvl
== (ARM_LPAE_MAX_LEVELS
- 1) && fmt
!= ARM_MALI_LPAE
)
154 return iopte_type(pte
) == ARM_LPAE_PTE_TYPE_PAGE
;
156 return iopte_type(pte
) == ARM_LPAE_PTE_TYPE_BLOCK
;
159 static arm_lpae_iopte
paddr_to_iopte(phys_addr_t paddr
,
160 struct arm_lpae_io_pgtable
*data
)
162 arm_lpae_iopte pte
= paddr
;
164 /* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
165 return (pte
| (pte
>> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK
;
168 static phys_addr_t
iopte_to_paddr(arm_lpae_iopte pte
,
169 struct arm_lpae_io_pgtable
*data
)
171 u64 paddr
= pte
& ARM_LPAE_PTE_ADDR_MASK
;
173 if (ARM_LPAE_GRANULE(data
) < SZ_64K
)
176 /* Rotate the packed high-order bits back to the top */
177 return (paddr
| (paddr
<< (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK
<< 4);
180 static bool selftest_running
= false;
182 static dma_addr_t
__arm_lpae_dma_addr(void *pages
)
184 return (dma_addr_t
)virt_to_phys(pages
);
187 static void *__arm_lpae_alloc_pages(size_t size
, gfp_t gfp
,
188 struct io_pgtable_cfg
*cfg
)
190 struct device
*dev
= cfg
->iommu_dev
;
191 int order
= get_order(size
);
196 VM_BUG_ON((gfp
& __GFP_HIGHMEM
));
197 p
= alloc_pages_node(dev
? dev_to_node(dev
) : NUMA_NO_NODE
,
198 gfp
| __GFP_ZERO
, order
);
202 pages
= page_address(p
);
203 if (!cfg
->coherent_walk
) {
204 dma
= dma_map_single(dev
, pages
, size
, DMA_TO_DEVICE
);
205 if (dma_mapping_error(dev
, dma
))
208 * We depend on the IOMMU being able to work with any physical
209 * address directly, so if the DMA layer suggests otherwise by
210 * translating or truncating them, that bodes very badly...
212 if (dma
!= virt_to_phys(pages
))
219 dev_err(dev
, "Cannot accommodate DMA translation for IOMMU page tables\n");
220 dma_unmap_single(dev
, dma
, size
, DMA_TO_DEVICE
);
222 __free_pages(p
, order
);
226 static void __arm_lpae_free_pages(void *pages
, size_t size
,
227 struct io_pgtable_cfg
*cfg
)
229 if (!cfg
->coherent_walk
)
230 dma_unmap_single(cfg
->iommu_dev
, __arm_lpae_dma_addr(pages
),
231 size
, DMA_TO_DEVICE
);
232 free_pages((unsigned long)pages
, get_order(size
));
235 static void __arm_lpae_sync_pte(arm_lpae_iopte
*ptep
,
236 struct io_pgtable_cfg
*cfg
)
238 dma_sync_single_for_device(cfg
->iommu_dev
, __arm_lpae_dma_addr(ptep
),
239 sizeof(*ptep
), DMA_TO_DEVICE
);
242 static void __arm_lpae_set_pte(arm_lpae_iopte
*ptep
, arm_lpae_iopte pte
,
243 struct io_pgtable_cfg
*cfg
)
247 if (!cfg
->coherent_walk
)
248 __arm_lpae_sync_pte(ptep
, cfg
);
251 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable
*data
,
252 struct iommu_iotlb_gather
*gather
,
253 unsigned long iova
, size_t size
, int lvl
,
254 arm_lpae_iopte
*ptep
);
256 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable
*data
,
257 phys_addr_t paddr
, arm_lpae_iopte prot
,
258 int lvl
, arm_lpae_iopte
*ptep
)
260 arm_lpae_iopte pte
= prot
;
262 if (data
->iop
.fmt
!= ARM_MALI_LPAE
&& lvl
== ARM_LPAE_MAX_LEVELS
- 1)
263 pte
|= ARM_LPAE_PTE_TYPE_PAGE
;
265 pte
|= ARM_LPAE_PTE_TYPE_BLOCK
;
267 pte
|= paddr_to_iopte(paddr
, data
);
269 __arm_lpae_set_pte(ptep
, pte
, &data
->iop
.cfg
);
272 static int arm_lpae_init_pte(struct arm_lpae_io_pgtable
*data
,
273 unsigned long iova
, phys_addr_t paddr
,
274 arm_lpae_iopte prot
, int lvl
,
275 arm_lpae_iopte
*ptep
)
277 arm_lpae_iopte pte
= *ptep
;
279 if (iopte_leaf(pte
, lvl
, data
->iop
.fmt
)) {
280 /* We require an unmap first */
281 WARN_ON(!selftest_running
);
283 } else if (iopte_type(pte
) == ARM_LPAE_PTE_TYPE_TABLE
) {
285 * We need to unmap and free the old table before
286 * overwriting it with a block entry.
288 arm_lpae_iopte
*tblp
;
289 size_t sz
= ARM_LPAE_BLOCK_SIZE(lvl
, data
);
291 tblp
= ptep
- ARM_LPAE_LVL_IDX(iova
, lvl
, data
);
292 if (__arm_lpae_unmap(data
, NULL
, iova
, sz
, lvl
, tblp
) != sz
) {
298 __arm_lpae_init_pte(data
, paddr
, prot
, lvl
, ptep
);
302 static arm_lpae_iopte
arm_lpae_install_table(arm_lpae_iopte
*table
,
303 arm_lpae_iopte
*ptep
,
305 struct io_pgtable_cfg
*cfg
)
307 arm_lpae_iopte old
, new;
309 new = __pa(table
) | ARM_LPAE_PTE_TYPE_TABLE
;
310 if (cfg
->quirks
& IO_PGTABLE_QUIRK_ARM_NS
)
311 new |= ARM_LPAE_PTE_NSTABLE
;
314 * Ensure the table itself is visible before its PTE can be.
315 * Whilst we could get away with cmpxchg64_release below, this
316 * doesn't have any ordering semantics when !CONFIG_SMP.
320 old
= cmpxchg64_relaxed(ptep
, curr
, new);
322 if (cfg
->coherent_walk
|| (old
& ARM_LPAE_PTE_SW_SYNC
))
325 /* Even if it's not ours, there's no point waiting; just kick it */
326 __arm_lpae_sync_pte(ptep
, cfg
);
328 WRITE_ONCE(*ptep
, new | ARM_LPAE_PTE_SW_SYNC
);
333 static int __arm_lpae_map(struct arm_lpae_io_pgtable
*data
, unsigned long iova
,
334 phys_addr_t paddr
, size_t size
, arm_lpae_iopte prot
,
335 int lvl
, arm_lpae_iopte
*ptep
, gfp_t gfp
)
337 arm_lpae_iopte
*cptep
, pte
;
338 size_t block_size
= ARM_LPAE_BLOCK_SIZE(lvl
, data
);
339 size_t tblsz
= ARM_LPAE_GRANULE(data
);
340 struct io_pgtable_cfg
*cfg
= &data
->iop
.cfg
;
342 /* Find our entry at the current level */
343 ptep
+= ARM_LPAE_LVL_IDX(iova
, lvl
, data
);
345 /* If we can install a leaf entry at this level, then do so */
346 if (size
== block_size
)
347 return arm_lpae_init_pte(data
, iova
, paddr
, prot
, lvl
, ptep
);
349 /* We can't allocate tables at the final level */
350 if (WARN_ON(lvl
>= ARM_LPAE_MAX_LEVELS
- 1))
353 /* Grab a pointer to the next level */
354 pte
= READ_ONCE(*ptep
);
356 cptep
= __arm_lpae_alloc_pages(tblsz
, gfp
, cfg
);
360 pte
= arm_lpae_install_table(cptep
, ptep
, 0, cfg
);
362 __arm_lpae_free_pages(cptep
, tblsz
, cfg
);
363 } else if (!cfg
->coherent_walk
&& !(pte
& ARM_LPAE_PTE_SW_SYNC
)) {
364 __arm_lpae_sync_pte(ptep
, cfg
);
367 if (pte
&& !iopte_leaf(pte
, lvl
, data
->iop
.fmt
)) {
368 cptep
= iopte_deref(pte
, data
);
370 /* We require an unmap first */
371 WARN_ON(!selftest_running
);
376 return __arm_lpae_map(data
, iova
, paddr
, size
, prot
, lvl
+ 1, cptep
, gfp
);
379 static arm_lpae_iopte
arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable
*data
,
384 if (data
->iop
.fmt
== ARM_64_LPAE_S1
||
385 data
->iop
.fmt
== ARM_32_LPAE_S1
) {
386 pte
= ARM_LPAE_PTE_nG
;
387 if (!(prot
& IOMMU_WRITE
) && (prot
& IOMMU_READ
))
388 pte
|= ARM_LPAE_PTE_AP_RDONLY
;
389 if (!(prot
& IOMMU_PRIV
))
390 pte
|= ARM_LPAE_PTE_AP_UNPRIV
;
392 pte
= ARM_LPAE_PTE_HAP_FAULT
;
393 if (prot
& IOMMU_READ
)
394 pte
|= ARM_LPAE_PTE_HAP_READ
;
395 if (prot
& IOMMU_WRITE
)
396 pte
|= ARM_LPAE_PTE_HAP_WRITE
;
400 * Note that this logic is structured to accommodate Mali LPAE
401 * having stage-1-like attributes but stage-2-like permissions.
403 if (data
->iop
.fmt
== ARM_64_LPAE_S2
||
404 data
->iop
.fmt
== ARM_32_LPAE_S2
) {
405 if (prot
& IOMMU_MMIO
)
406 pte
|= ARM_LPAE_PTE_MEMATTR_DEV
;
407 else if (prot
& IOMMU_CACHE
)
408 pte
|= ARM_LPAE_PTE_MEMATTR_OIWB
;
410 pte
|= ARM_LPAE_PTE_MEMATTR_NC
;
412 if (prot
& IOMMU_MMIO
)
413 pte
|= (ARM_LPAE_MAIR_ATTR_IDX_DEV
414 << ARM_LPAE_PTE_ATTRINDX_SHIFT
);
415 else if (prot
& IOMMU_CACHE
)
416 pte
|= (ARM_LPAE_MAIR_ATTR_IDX_CACHE
417 << ARM_LPAE_PTE_ATTRINDX_SHIFT
);
421 * Also Mali has its own notions of shareability wherein its Inner
422 * domain covers the cores within the GPU, and its Outer domain is
423 * "outside the GPU" (i.e. either the Inner or System domain in CPU
424 * terms, depending on coherency).
426 if (prot
& IOMMU_CACHE
&& data
->iop
.fmt
!= ARM_MALI_LPAE
)
427 pte
|= ARM_LPAE_PTE_SH_IS
;
429 pte
|= ARM_LPAE_PTE_SH_OS
;
431 if (prot
& IOMMU_NOEXEC
)
432 pte
|= ARM_LPAE_PTE_XN
;
434 if (data
->iop
.cfg
.quirks
& IO_PGTABLE_QUIRK_ARM_NS
)
435 pte
|= ARM_LPAE_PTE_NS
;
437 if (data
->iop
.fmt
!= ARM_MALI_LPAE
)
438 pte
|= ARM_LPAE_PTE_AF
;
443 static int arm_lpae_map(struct io_pgtable_ops
*ops
, unsigned long iova
,
444 phys_addr_t paddr
, size_t size
, int iommu_prot
, gfp_t gfp
)
446 struct arm_lpae_io_pgtable
*data
= io_pgtable_ops_to_data(ops
);
447 struct io_pgtable_cfg
*cfg
= &data
->iop
.cfg
;
448 arm_lpae_iopte
*ptep
= data
->pgd
;
449 int ret
, lvl
= data
->start_level
;
451 long iaext
= (s64
)iova
>> cfg
->ias
;
453 if (WARN_ON(!size
|| (size
& cfg
->pgsize_bitmap
) != size
))
456 if (cfg
->quirks
& IO_PGTABLE_QUIRK_ARM_TTBR1
)
458 if (WARN_ON(iaext
|| paddr
>> cfg
->oas
))
461 /* If no access, then nothing to do */
462 if (!(iommu_prot
& (IOMMU_READ
| IOMMU_WRITE
)))
465 prot
= arm_lpae_prot_to_pte(data
, iommu_prot
);
466 ret
= __arm_lpae_map(data
, iova
, paddr
, size
, prot
, lvl
, ptep
, gfp
);
468 * Synchronise all PTE updates for the new mapping before there's
469 * a chance for anything to kick off a table walk for the new iova.
476 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable
*data
, int lvl
,
477 arm_lpae_iopte
*ptep
)
479 arm_lpae_iopte
*start
, *end
;
480 unsigned long table_size
;
482 if (lvl
== data
->start_level
)
483 table_size
= ARM_LPAE_PGD_SIZE(data
);
485 table_size
= ARM_LPAE_GRANULE(data
);
489 /* Only leaf entries at the last level */
490 if (lvl
== ARM_LPAE_MAX_LEVELS
- 1)
493 end
= (void *)ptep
+ table_size
;
495 while (ptep
!= end
) {
496 arm_lpae_iopte pte
= *ptep
++;
498 if (!pte
|| iopte_leaf(pte
, lvl
, data
->iop
.fmt
))
501 __arm_lpae_free_pgtable(data
, lvl
+ 1, iopte_deref(pte
, data
));
504 __arm_lpae_free_pages(start
, table_size
, &data
->iop
.cfg
);
507 static void arm_lpae_free_pgtable(struct io_pgtable
*iop
)
509 struct arm_lpae_io_pgtable
*data
= io_pgtable_to_data(iop
);
511 __arm_lpae_free_pgtable(data
, data
->start_level
, data
->pgd
);
515 static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable
*data
,
516 struct iommu_iotlb_gather
*gather
,
517 unsigned long iova
, size_t size
,
518 arm_lpae_iopte blk_pte
, int lvl
,
519 arm_lpae_iopte
*ptep
)
521 struct io_pgtable_cfg
*cfg
= &data
->iop
.cfg
;
522 arm_lpae_iopte pte
, *tablep
;
523 phys_addr_t blk_paddr
;
524 size_t tablesz
= ARM_LPAE_GRANULE(data
);
525 size_t split_sz
= ARM_LPAE_BLOCK_SIZE(lvl
, data
);
526 int i
, unmap_idx
= -1;
528 if (WARN_ON(lvl
== ARM_LPAE_MAX_LEVELS
))
531 tablep
= __arm_lpae_alloc_pages(tablesz
, GFP_ATOMIC
, cfg
);
533 return 0; /* Bytes unmapped */
535 if (size
== split_sz
)
536 unmap_idx
= ARM_LPAE_LVL_IDX(iova
, lvl
, data
);
538 blk_paddr
= iopte_to_paddr(blk_pte
, data
);
539 pte
= iopte_prot(blk_pte
);
541 for (i
= 0; i
< tablesz
/ sizeof(pte
); i
++, blk_paddr
+= split_sz
) {
546 __arm_lpae_init_pte(data
, blk_paddr
, pte
, lvl
, &tablep
[i
]);
549 pte
= arm_lpae_install_table(tablep
, ptep
, blk_pte
, cfg
);
550 if (pte
!= blk_pte
) {
551 __arm_lpae_free_pages(tablep
, tablesz
, cfg
);
553 * We may race against someone unmapping another part of this
554 * block, but anything else is invalid. We can't misinterpret
555 * a page entry here since we're never at the last level.
557 if (iopte_type(pte
) != ARM_LPAE_PTE_TYPE_TABLE
)
560 tablep
= iopte_deref(pte
, data
);
561 } else if (unmap_idx
>= 0) {
562 io_pgtable_tlb_add_page(&data
->iop
, gather
, iova
, size
);
566 return __arm_lpae_unmap(data
, gather
, iova
, size
, lvl
, tablep
);
569 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable
*data
,
570 struct iommu_iotlb_gather
*gather
,
571 unsigned long iova
, size_t size
, int lvl
,
572 arm_lpae_iopte
*ptep
)
575 struct io_pgtable
*iop
= &data
->iop
;
577 /* Something went horribly wrong and we ran out of page table */
578 if (WARN_ON(lvl
== ARM_LPAE_MAX_LEVELS
))
581 ptep
+= ARM_LPAE_LVL_IDX(iova
, lvl
, data
);
582 pte
= READ_ONCE(*ptep
);
586 /* If the size matches this level, we're in the right place */
587 if (size
== ARM_LPAE_BLOCK_SIZE(lvl
, data
)) {
588 __arm_lpae_set_pte(ptep
, 0, &iop
->cfg
);
590 if (!iopte_leaf(pte
, lvl
, iop
->fmt
)) {
591 /* Also flush any partial walks */
592 io_pgtable_tlb_flush_walk(iop
, iova
, size
,
593 ARM_LPAE_GRANULE(data
));
594 ptep
= iopte_deref(pte
, data
);
595 __arm_lpae_free_pgtable(data
, lvl
+ 1, ptep
);
596 } else if (iop
->cfg
.quirks
& IO_PGTABLE_QUIRK_NON_STRICT
) {
598 * Order the PTE update against queueing the IOVA, to
599 * guarantee that a flush callback from a different CPU
600 * has observed it before the TLBIALL can be issued.
604 io_pgtable_tlb_add_page(iop
, gather
, iova
, size
);
608 } else if (iopte_leaf(pte
, lvl
, iop
->fmt
)) {
610 * Insert a table at the next level to map the old region,
611 * minus the part we want to unmap
613 return arm_lpae_split_blk_unmap(data
, gather
, iova
, size
, pte
,
617 /* Keep on walkin' */
618 ptep
= iopte_deref(pte
, data
);
619 return __arm_lpae_unmap(data
, gather
, iova
, size
, lvl
+ 1, ptep
);
622 static size_t arm_lpae_unmap(struct io_pgtable_ops
*ops
, unsigned long iova
,
623 size_t size
, struct iommu_iotlb_gather
*gather
)
625 struct arm_lpae_io_pgtable
*data
= io_pgtable_ops_to_data(ops
);
626 struct io_pgtable_cfg
*cfg
= &data
->iop
.cfg
;
627 arm_lpae_iopte
*ptep
= data
->pgd
;
628 long iaext
= (s64
)iova
>> cfg
->ias
;
630 if (WARN_ON(!size
|| (size
& cfg
->pgsize_bitmap
) != size
))
633 if (cfg
->quirks
& IO_PGTABLE_QUIRK_ARM_TTBR1
)
638 return __arm_lpae_unmap(data
, gather
, iova
, size
, data
->start_level
, ptep
);
641 static phys_addr_t
arm_lpae_iova_to_phys(struct io_pgtable_ops
*ops
,
644 struct arm_lpae_io_pgtable
*data
= io_pgtable_ops_to_data(ops
);
645 arm_lpae_iopte pte
, *ptep
= data
->pgd
;
646 int lvl
= data
->start_level
;
649 /* Valid IOPTE pointer? */
653 /* Grab the IOPTE we're interested in */
654 ptep
+= ARM_LPAE_LVL_IDX(iova
, lvl
, data
);
655 pte
= READ_ONCE(*ptep
);
662 if (iopte_leaf(pte
, lvl
, data
->iop
.fmt
))
663 goto found_translation
;
665 /* Take it to the next level */
666 ptep
= iopte_deref(pte
, data
);
667 } while (++lvl
< ARM_LPAE_MAX_LEVELS
);
669 /* Ran out of page tables to walk */
673 iova
&= (ARM_LPAE_BLOCK_SIZE(lvl
, data
) - 1);
674 return iopte_to_paddr(pte
, data
) | iova
;
677 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg
*cfg
)
679 unsigned long granule
, page_sizes
;
680 unsigned int max_addr_bits
= 48;
683 * We need to restrict the supported page sizes to match the
684 * translation regime for a particular granule. Aim to match
685 * the CPU page size if possible, otherwise prefer smaller sizes.
686 * While we're at it, restrict the block sizes to match the
689 if (cfg
->pgsize_bitmap
& PAGE_SIZE
)
691 else if (cfg
->pgsize_bitmap
& ~PAGE_MASK
)
692 granule
= 1UL << __fls(cfg
->pgsize_bitmap
& ~PAGE_MASK
);
693 else if (cfg
->pgsize_bitmap
& PAGE_MASK
)
694 granule
= 1UL << __ffs(cfg
->pgsize_bitmap
& PAGE_MASK
);
700 page_sizes
= (SZ_4K
| SZ_2M
| SZ_1G
);
703 page_sizes
= (SZ_16K
| SZ_32M
);
707 page_sizes
= (SZ_64K
| SZ_512M
);
709 page_sizes
|= 1ULL << 42; /* 4TB */
715 cfg
->pgsize_bitmap
&= page_sizes
;
716 cfg
->ias
= min(cfg
->ias
, max_addr_bits
);
717 cfg
->oas
= min(cfg
->oas
, max_addr_bits
);
720 static struct arm_lpae_io_pgtable
*
721 arm_lpae_alloc_pgtable(struct io_pgtable_cfg
*cfg
)
723 struct arm_lpae_io_pgtable
*data
;
724 int levels
, va_bits
, pg_shift
;
726 arm_lpae_restrict_pgsizes(cfg
);
728 if (!(cfg
->pgsize_bitmap
& (SZ_4K
| SZ_16K
| SZ_64K
)))
731 if (cfg
->ias
> ARM_LPAE_MAX_ADDR_BITS
)
734 if (cfg
->oas
> ARM_LPAE_MAX_ADDR_BITS
)
737 data
= kmalloc(sizeof(*data
), GFP_KERNEL
);
741 pg_shift
= __ffs(cfg
->pgsize_bitmap
);
742 data
->bits_per_level
= pg_shift
- ilog2(sizeof(arm_lpae_iopte
));
744 va_bits
= cfg
->ias
- pg_shift
;
745 levels
= DIV_ROUND_UP(va_bits
, data
->bits_per_level
);
746 data
->start_level
= ARM_LPAE_MAX_LEVELS
- levels
;
748 /* Calculate the actual size of our pgd (without concatenation) */
749 data
->pgd_bits
= va_bits
- (data
->bits_per_level
* (levels
- 1));
751 data
->iop
.ops
= (struct io_pgtable_ops
) {
753 .unmap
= arm_lpae_unmap
,
754 .iova_to_phys
= arm_lpae_iova_to_phys
,
760 static struct io_pgtable
*
761 arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg
*cfg
, void *cookie
)
764 struct arm_lpae_io_pgtable
*data
;
765 typeof(&cfg
->arm_lpae_s1_cfg
.tcr
) tcr
= &cfg
->arm_lpae_s1_cfg
.tcr
;
768 if (cfg
->quirks
& ~(IO_PGTABLE_QUIRK_ARM_NS
|
769 IO_PGTABLE_QUIRK_NON_STRICT
|
770 IO_PGTABLE_QUIRK_ARM_TTBR1
|
771 IO_PGTABLE_QUIRK_ARM_OUTER_WBWA
))
774 data
= arm_lpae_alloc_pgtable(cfg
);
779 if (cfg
->coherent_walk
) {
780 tcr
->sh
= ARM_LPAE_TCR_SH_IS
;
781 tcr
->irgn
= ARM_LPAE_TCR_RGN_WBWA
;
782 tcr
->orgn
= ARM_LPAE_TCR_RGN_WBWA
;
783 if (cfg
->quirks
& IO_PGTABLE_QUIRK_ARM_OUTER_WBWA
)
786 tcr
->sh
= ARM_LPAE_TCR_SH_OS
;
787 tcr
->irgn
= ARM_LPAE_TCR_RGN_NC
;
788 if (!(cfg
->quirks
& IO_PGTABLE_QUIRK_ARM_OUTER_WBWA
))
789 tcr
->orgn
= ARM_LPAE_TCR_RGN_NC
;
791 tcr
->orgn
= ARM_LPAE_TCR_RGN_WBWA
;
794 tg1
= cfg
->quirks
& IO_PGTABLE_QUIRK_ARM_TTBR1
;
795 switch (ARM_LPAE_GRANULE(data
)) {
797 tcr
->tg
= tg1
? ARM_LPAE_TCR_TG1_4K
: ARM_LPAE_TCR_TG0_4K
;
800 tcr
->tg
= tg1
? ARM_LPAE_TCR_TG1_16K
: ARM_LPAE_TCR_TG0_16K
;
803 tcr
->tg
= tg1
? ARM_LPAE_TCR_TG1_64K
: ARM_LPAE_TCR_TG0_64K
;
809 tcr
->ips
= ARM_LPAE_TCR_PS_32_BIT
;
812 tcr
->ips
= ARM_LPAE_TCR_PS_36_BIT
;
815 tcr
->ips
= ARM_LPAE_TCR_PS_40_BIT
;
818 tcr
->ips
= ARM_LPAE_TCR_PS_42_BIT
;
821 tcr
->ips
= ARM_LPAE_TCR_PS_44_BIT
;
824 tcr
->ips
= ARM_LPAE_TCR_PS_48_BIT
;
827 tcr
->ips
= ARM_LPAE_TCR_PS_52_BIT
;
833 tcr
->tsz
= 64ULL - cfg
->ias
;
836 reg
= (ARM_LPAE_MAIR_ATTR_NC
837 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC
)) |
838 (ARM_LPAE_MAIR_ATTR_WBRWA
839 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE
)) |
840 (ARM_LPAE_MAIR_ATTR_DEVICE
841 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV
)) |
842 (ARM_LPAE_MAIR_ATTR_INC_OWBRWA
843 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_INC_OCACHE
));
845 cfg
->arm_lpae_s1_cfg
.mair
= reg
;
847 /* Looking good; allocate a pgd */
848 data
->pgd
= __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data
),
853 /* Ensure the empty pgd is visible before any actual TTBR write */
857 cfg
->arm_lpae_s1_cfg
.ttbr
= virt_to_phys(data
->pgd
);
865 static struct io_pgtable
*
866 arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg
*cfg
, void *cookie
)
869 struct arm_lpae_io_pgtable
*data
;
870 typeof(&cfg
->arm_lpae_s2_cfg
.vtcr
) vtcr
= &cfg
->arm_lpae_s2_cfg
.vtcr
;
872 /* The NS quirk doesn't apply at stage 2 */
873 if (cfg
->quirks
& ~(IO_PGTABLE_QUIRK_NON_STRICT
))
876 data
= arm_lpae_alloc_pgtable(cfg
);
881 * Concatenate PGDs at level 1 if possible in order to reduce
882 * the depth of the stage-2 walk.
884 if (data
->start_level
== 0) {
885 unsigned long pgd_pages
;
887 pgd_pages
= ARM_LPAE_PGD_SIZE(data
) / sizeof(arm_lpae_iopte
);
888 if (pgd_pages
<= ARM_LPAE_S2_MAX_CONCAT_PAGES
) {
889 data
->pgd_bits
+= data
->bits_per_level
;
895 if (cfg
->coherent_walk
) {
896 vtcr
->sh
= ARM_LPAE_TCR_SH_IS
;
897 vtcr
->irgn
= ARM_LPAE_TCR_RGN_WBWA
;
898 vtcr
->orgn
= ARM_LPAE_TCR_RGN_WBWA
;
900 vtcr
->sh
= ARM_LPAE_TCR_SH_OS
;
901 vtcr
->irgn
= ARM_LPAE_TCR_RGN_NC
;
902 vtcr
->orgn
= ARM_LPAE_TCR_RGN_NC
;
905 sl
= data
->start_level
;
907 switch (ARM_LPAE_GRANULE(data
)) {
909 vtcr
->tg
= ARM_LPAE_TCR_TG0_4K
;
910 sl
++; /* SL0 format is different for 4K granule size */
913 vtcr
->tg
= ARM_LPAE_TCR_TG0_16K
;
916 vtcr
->tg
= ARM_LPAE_TCR_TG0_64K
;
922 vtcr
->ps
= ARM_LPAE_TCR_PS_32_BIT
;
925 vtcr
->ps
= ARM_LPAE_TCR_PS_36_BIT
;
928 vtcr
->ps
= ARM_LPAE_TCR_PS_40_BIT
;
931 vtcr
->ps
= ARM_LPAE_TCR_PS_42_BIT
;
934 vtcr
->ps
= ARM_LPAE_TCR_PS_44_BIT
;
937 vtcr
->ps
= ARM_LPAE_TCR_PS_48_BIT
;
940 vtcr
->ps
= ARM_LPAE_TCR_PS_52_BIT
;
946 vtcr
->tsz
= 64ULL - cfg
->ias
;
947 vtcr
->sl
= ~sl
& ARM_LPAE_VTCR_SL0_MASK
;
949 /* Allocate pgd pages */
950 data
->pgd
= __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data
),
955 /* Ensure the empty pgd is visible before any actual TTBR write */
959 cfg
->arm_lpae_s2_cfg
.vttbr
= virt_to_phys(data
->pgd
);
967 static struct io_pgtable
*
968 arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg
*cfg
, void *cookie
)
970 if (cfg
->ias
> 32 || cfg
->oas
> 40)
973 cfg
->pgsize_bitmap
&= (SZ_4K
| SZ_2M
| SZ_1G
);
974 return arm_64_lpae_alloc_pgtable_s1(cfg
, cookie
);
977 static struct io_pgtable
*
978 arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg
*cfg
, void *cookie
)
980 if (cfg
->ias
> 40 || cfg
->oas
> 40)
983 cfg
->pgsize_bitmap
&= (SZ_4K
| SZ_2M
| SZ_1G
);
984 return arm_64_lpae_alloc_pgtable_s2(cfg
, cookie
);
987 static struct io_pgtable
*
988 arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg
*cfg
, void *cookie
)
990 struct arm_lpae_io_pgtable
*data
;
992 /* No quirks for Mali (hopefully) */
996 if (cfg
->ias
> 48 || cfg
->oas
> 40)
999 cfg
->pgsize_bitmap
&= (SZ_4K
| SZ_2M
| SZ_1G
);
1001 data
= arm_lpae_alloc_pgtable(cfg
);
1005 /* Mali seems to need a full 4-level table regardless of IAS */
1006 if (data
->start_level
> 0) {
1007 data
->start_level
= 0;
1011 * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
1012 * best we can do is mimic the out-of-tree driver and hope that the
1013 * "implementation-defined caching policy" is good enough. Similarly,
1014 * we'll use it for the sake of a valid attribute for our 'device'
1015 * index, although callers should never request that in practice.
1017 cfg
->arm_mali_lpae_cfg
.memattr
=
1018 (ARM_MALI_LPAE_MEMATTR_IMP_DEF
1019 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC
)) |
1020 (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
1021 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE
)) |
1022 (ARM_MALI_LPAE_MEMATTR_IMP_DEF
1023 << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV
));
1025 data
->pgd
= __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data
), GFP_KERNEL
,
1030 /* Ensure the empty pgd is visible before TRANSTAB can be written */
1033 cfg
->arm_mali_lpae_cfg
.transtab
= virt_to_phys(data
->pgd
) |
1034 ARM_MALI_LPAE_TTBR_READ_INNER
|
1035 ARM_MALI_LPAE_TTBR_ADRMODE_TABLE
;
1036 if (cfg
->coherent_walk
)
1037 cfg
->arm_mali_lpae_cfg
.transtab
|= ARM_MALI_LPAE_TTBR_SHARE_OUTER
;
1046 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns
= {
1047 .alloc
= arm_64_lpae_alloc_pgtable_s1
,
1048 .free
= arm_lpae_free_pgtable
,
1051 struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns
= {
1052 .alloc
= arm_64_lpae_alloc_pgtable_s2
,
1053 .free
= arm_lpae_free_pgtable
,
1056 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns
= {
1057 .alloc
= arm_32_lpae_alloc_pgtable_s1
,
1058 .free
= arm_lpae_free_pgtable
,
1061 struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns
= {
1062 .alloc
= arm_32_lpae_alloc_pgtable_s2
,
1063 .free
= arm_lpae_free_pgtable
,
1066 struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns
= {
1067 .alloc
= arm_mali_lpae_alloc_pgtable
,
1068 .free
= arm_lpae_free_pgtable
,
1071 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
1073 static struct io_pgtable_cfg
*cfg_cookie __initdata
;
1075 static void __init
dummy_tlb_flush_all(void *cookie
)
1077 WARN_ON(cookie
!= cfg_cookie
);
1080 static void __init
dummy_tlb_flush(unsigned long iova
, size_t size
,
1081 size_t granule
, void *cookie
)
1083 WARN_ON(cookie
!= cfg_cookie
);
1084 WARN_ON(!(size
& cfg_cookie
->pgsize_bitmap
));
1087 static void __init
dummy_tlb_add_page(struct iommu_iotlb_gather
*gather
,
1088 unsigned long iova
, size_t granule
,
1091 dummy_tlb_flush(iova
, granule
, granule
, cookie
);
1094 static const struct iommu_flush_ops dummy_tlb_ops __initconst
= {
1095 .tlb_flush_all
= dummy_tlb_flush_all
,
1096 .tlb_flush_walk
= dummy_tlb_flush
,
1097 .tlb_add_page
= dummy_tlb_add_page
,
1100 static void __init
arm_lpae_dump_ops(struct io_pgtable_ops
*ops
)
1102 struct arm_lpae_io_pgtable
*data
= io_pgtable_ops_to_data(ops
);
1103 struct io_pgtable_cfg
*cfg
= &data
->iop
.cfg
;
1105 pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n",
1106 cfg
->pgsize_bitmap
, cfg
->ias
);
1107 pr_err("data: %d levels, 0x%zx pgd_size, %u pg_shift, %u bits_per_level, pgd @ %p\n",
1108 ARM_LPAE_MAX_LEVELS
- data
->start_level
, ARM_LPAE_PGD_SIZE(data
),
1109 ilog2(ARM_LPAE_GRANULE(data
)), data
->bits_per_level
, data
->pgd
);
1112 #define __FAIL(ops, i) ({ \
1113 WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \
1114 arm_lpae_dump_ops(ops); \
1115 selftest_running = false; \
1119 static int __init
arm_lpae_run_tests(struct io_pgtable_cfg
*cfg
)
1121 static const enum io_pgtable_fmt fmts
[] __initconst
= {
1129 struct io_pgtable_ops
*ops
;
1131 selftest_running
= true;
1133 for (i
= 0; i
< ARRAY_SIZE(fmts
); ++i
) {
1135 ops
= alloc_io_pgtable_ops(fmts
[i
], cfg
, cfg
);
1137 pr_err("selftest: failed to allocate io pgtable ops\n");
1142 * Initial sanity checks.
1143 * Empty page tables shouldn't provide any translations.
1145 if (ops
->iova_to_phys(ops
, 42))
1146 return __FAIL(ops
, i
);
1148 if (ops
->iova_to_phys(ops
, SZ_1G
+ 42))
1149 return __FAIL(ops
, i
);
1151 if (ops
->iova_to_phys(ops
, SZ_2G
+ 42))
1152 return __FAIL(ops
, i
);
1155 * Distinct mappings of different granule sizes.
1158 for_each_set_bit(j
, &cfg
->pgsize_bitmap
, BITS_PER_LONG
) {
1161 if (ops
->map(ops
, iova
, iova
, size
, IOMMU_READ
|
1164 IOMMU_CACHE
, GFP_KERNEL
))
1165 return __FAIL(ops
, i
);
1167 /* Overlapping mappings */
1168 if (!ops
->map(ops
, iova
, iova
+ size
, size
,
1169 IOMMU_READ
| IOMMU_NOEXEC
, GFP_KERNEL
))
1170 return __FAIL(ops
, i
);
1172 if (ops
->iova_to_phys(ops
, iova
+ 42) != (iova
+ 42))
1173 return __FAIL(ops
, i
);
1179 size
= 1UL << __ffs(cfg
->pgsize_bitmap
);
1180 if (ops
->unmap(ops
, SZ_1G
+ size
, size
, NULL
) != size
)
1181 return __FAIL(ops
, i
);
1183 /* Remap of partial unmap */
1184 if (ops
->map(ops
, SZ_1G
+ size
, size
, size
, IOMMU_READ
, GFP_KERNEL
))
1185 return __FAIL(ops
, i
);
1187 if (ops
->iova_to_phys(ops
, SZ_1G
+ size
+ 42) != (size
+ 42))
1188 return __FAIL(ops
, i
);
1192 for_each_set_bit(j
, &cfg
->pgsize_bitmap
, BITS_PER_LONG
) {
1195 if (ops
->unmap(ops
, iova
, size
, NULL
) != size
)
1196 return __FAIL(ops
, i
);
1198 if (ops
->iova_to_phys(ops
, iova
+ 42))
1199 return __FAIL(ops
, i
);
1201 /* Remap full block */
1202 if (ops
->map(ops
, iova
, iova
, size
, IOMMU_WRITE
, GFP_KERNEL
))
1203 return __FAIL(ops
, i
);
1205 if (ops
->iova_to_phys(ops
, iova
+ 42) != (iova
+ 42))
1206 return __FAIL(ops
, i
);
1211 free_io_pgtable_ops(ops
);
1214 selftest_running
= false;
1218 static int __init
arm_lpae_do_selftests(void)
1220 static const unsigned long pgsize
[] __initconst
= {
1221 SZ_4K
| SZ_2M
| SZ_1G
,
1226 static const unsigned int ias
[] __initconst
= {
1227 32, 36, 40, 42, 44, 48,
1230 int i
, j
, pass
= 0, fail
= 0;
1231 struct io_pgtable_cfg cfg
= {
1232 .tlb
= &dummy_tlb_ops
,
1234 .coherent_walk
= true,
1237 for (i
= 0; i
< ARRAY_SIZE(pgsize
); ++i
) {
1238 for (j
= 0; j
< ARRAY_SIZE(ias
); ++j
) {
1239 cfg
.pgsize_bitmap
= pgsize
[i
];
1241 pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n",
1243 if (arm_lpae_run_tests(&cfg
))
1250 pr_info("selftest: completed with %d PASS %d FAIL\n", pass
, fail
);
1251 return fail
? -EFAULT
: 0;
1253 subsys_initcall(arm_lpae_do_selftests
);