1 // SPDX-License-Identifier: GPL-2.0-only
3 * linux/arch/arm/mm/mmu.c
5 * Copyright (C) 1995-2005 Russell King
7 #include <linux/module.h>
8 #include <linux/kernel.h>
9 #include <linux/errno.h>
10 #include <linux/init.h>
11 #include <linux/mman.h>
12 #include <linux/nodemask.h>
13 #include <linux/memblock.h>
15 #include <linux/vmalloc.h>
16 #include <linux/sizes.h>
19 #include <asm/cputype.h>
20 #include <asm/cachetype.h>
21 #include <asm/sections.h>
22 #include <asm/setup.h>
23 #include <asm/smp_plat.h>
26 #include <asm/highmem.h>
27 #include <asm/system_info.h>
28 #include <asm/traps.h>
29 #include <asm/procinfo.h>
31 #include <asm/pgalloc.h>
32 #include <asm/kasan_def.h>
34 #include <asm/mach/arch.h>
35 #include <asm/mach/map.h>
36 #include <asm/mach/pci.h>
37 #include <asm/fixmap.h>
42 extern unsigned long __atags_pointer
;
45 * empty_zero_page is a special page that is used for
46 * zero-initialized data and COW.
48 struct page
*empty_zero_page
;
49 EXPORT_SYMBOL(empty_zero_page
);
52 * The pmd table for the upper-most set of pages.
56 pmdval_t user_pmd_table
= _PAGE_USER_TABLE
;
58 #define CPOLICY_UNCACHED 0
59 #define CPOLICY_BUFFERED 1
60 #define CPOLICY_WRITETHROUGH 2
61 #define CPOLICY_WRITEBACK 3
62 #define CPOLICY_WRITEALLOC 4
64 static unsigned int cachepolicy __initdata
= CPOLICY_WRITEBACK
;
65 static unsigned int ecc_mask __initdata
= 0;
67 pgprot_t pgprot_kernel
;
69 EXPORT_SYMBOL(pgprot_user
);
70 EXPORT_SYMBOL(pgprot_kernel
);
73 const char policy
[16];
79 static struct cachepolicy cache_policies
[] __initdata
= {
83 .pmd
= PMD_SECT_UNCACHED
,
84 .pte
= L_PTE_MT_UNCACHED
,
88 .pmd
= PMD_SECT_BUFFERED
,
89 .pte
= L_PTE_MT_BUFFERABLE
,
91 .policy
= "writethrough",
94 .pte
= L_PTE_MT_WRITETHROUGH
,
96 .policy
= "writeback",
99 .pte
= L_PTE_MT_WRITEBACK
,
101 .policy
= "writealloc",
103 .pmd
= PMD_SECT_WBWA
,
104 .pte
= L_PTE_MT_WRITEALLOC
,
108 #ifdef CONFIG_CPU_CP15
109 static unsigned long initial_pmd_value __initdata
= 0;
112 * Initialise the cache_policy variable with the initial state specified
113 * via the "pmd" value. This is used to ensure that on ARMv6 and later,
114 * the C code sets the page tables up with the same policy as the head
115 * assembly code, which avoids an illegal state where the TLBs can get
116 * confused. See comments in early_cachepolicy() for more information.
118 void __init
init_default_cache_policy(unsigned long pmd
)
122 initial_pmd_value
= pmd
;
124 pmd
&= PMD_SECT_CACHE_MASK
;
126 for (i
= 0; i
< ARRAY_SIZE(cache_policies
); i
++)
127 if (cache_policies
[i
].pmd
== pmd
) {
132 if (i
== ARRAY_SIZE(cache_policies
))
133 pr_err("ERROR: could not find cache policy\n");
137 * These are useful for identifying cache coherency problems by allowing
138 * the cache or the cache and writebuffer to be turned off. (Note: the
139 * write buffer should not be on and the cache off).
141 static int __init
early_cachepolicy(char *p
)
143 int i
, selected
= -1;
145 for (i
= 0; i
< ARRAY_SIZE(cache_policies
); i
++) {
146 int len
= strlen(cache_policies
[i
].policy
);
148 if (memcmp(p
, cache_policies
[i
].policy
, len
) == 0) {
155 pr_err("ERROR: unknown or unsupported cache policy\n");
158 * This restriction is partly to do with the way we boot; it is
159 * unpredictable to have memory mapped using two different sets of
160 * memory attributes (shared, type, and cache attribs). We can not
161 * change these attributes once the initial assembly has setup the
164 if (cpu_architecture() >= CPU_ARCH_ARMv6
&& selected
!= cachepolicy
) {
165 pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n",
166 cache_policies
[cachepolicy
].policy
);
170 if (selected
!= cachepolicy
) {
171 unsigned long cr
= __clear_cr(cache_policies
[selected
].cr_mask
);
172 cachepolicy
= selected
;
178 early_param("cachepolicy", early_cachepolicy
);
180 static int __init
early_nocache(char *__unused
)
182 char *p
= "buffered";
183 pr_warn("nocache is deprecated; use cachepolicy=%s\n", p
);
184 early_cachepolicy(p
);
187 early_param("nocache", early_nocache
);
189 static int __init
early_nowrite(char *__unused
)
191 char *p
= "uncached";
192 pr_warn("nowb is deprecated; use cachepolicy=%s\n", p
);
193 early_cachepolicy(p
);
196 early_param("nowb", early_nowrite
);
198 #ifndef CONFIG_ARM_LPAE
199 static int __init
early_ecc(char *p
)
201 if (memcmp(p
, "on", 2) == 0)
202 ecc_mask
= PMD_PROTECTION
;
203 else if (memcmp(p
, "off", 3) == 0)
207 early_param("ecc", early_ecc
);
210 #else /* ifdef CONFIG_CPU_CP15 */
212 static int __init
early_cachepolicy(char *p
)
214 pr_warn("cachepolicy kernel parameter not supported without cp15\n");
217 early_param("cachepolicy", early_cachepolicy
);
219 static int __init
noalign_setup(char *__unused
)
221 pr_warn("noalign kernel parameter not supported without cp15\n");
224 __setup("noalign", noalign_setup
);
226 #endif /* ifdef CONFIG_CPU_CP15 / else */
228 #define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
229 #define PROT_PTE_S2_DEVICE PROT_PTE_DEVICE
230 #define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE
232 static struct mem_type mem_types
[] __ro_after_init
= {
233 [MT_DEVICE
] = { /* Strongly ordered / ARMv6 shared device */
234 .prot_pte
= PROT_PTE_DEVICE
| L_PTE_MT_DEV_SHARED
|
236 .prot_l1
= PMD_TYPE_TABLE
,
237 .prot_sect
= PROT_SECT_DEVICE
| PMD_SECT_S
,
240 [MT_DEVICE_NONSHARED
] = { /* ARMv6 non-shared device */
241 .prot_pte
= PROT_PTE_DEVICE
| L_PTE_MT_DEV_NONSHARED
,
242 .prot_l1
= PMD_TYPE_TABLE
,
243 .prot_sect
= PROT_SECT_DEVICE
,
246 [MT_DEVICE_CACHED
] = { /* ioremap_cache */
247 .prot_pte
= PROT_PTE_DEVICE
| L_PTE_MT_DEV_CACHED
,
248 .prot_l1
= PMD_TYPE_TABLE
,
249 .prot_sect
= PROT_SECT_DEVICE
| PMD_SECT_WB
,
252 [MT_DEVICE_WC
] = { /* ioremap_wc */
253 .prot_pte
= PROT_PTE_DEVICE
| L_PTE_MT_DEV_WC
,
254 .prot_l1
= PMD_TYPE_TABLE
,
255 .prot_sect
= PROT_SECT_DEVICE
,
259 .prot_pte
= PROT_PTE_DEVICE
,
260 .prot_l1
= PMD_TYPE_TABLE
,
261 .prot_sect
= PMD_TYPE_SECT
| PMD_SECT_XN
,
265 .prot_sect
= PMD_TYPE_SECT
| PMD_SECT_XN
,
266 .domain
= DOMAIN_KERNEL
,
268 #ifndef CONFIG_ARM_LPAE
270 .prot_sect
= PMD_TYPE_SECT
| PMD_SECT_XN
| PMD_SECT_MINICACHE
,
271 .domain
= DOMAIN_KERNEL
,
275 .prot_pte
= L_PTE_PRESENT
| L_PTE_YOUNG
| L_PTE_DIRTY
|
277 .prot_l1
= PMD_TYPE_TABLE
,
278 .domain
= DOMAIN_VECTORS
,
280 [MT_HIGH_VECTORS
] = {
281 .prot_pte
= L_PTE_PRESENT
| L_PTE_YOUNG
| L_PTE_DIRTY
|
282 L_PTE_USER
| L_PTE_RDONLY
,
283 .prot_l1
= PMD_TYPE_TABLE
,
284 .domain
= DOMAIN_VECTORS
,
287 .prot_pte
= L_PTE_PRESENT
| L_PTE_YOUNG
| L_PTE_DIRTY
,
288 .prot_l1
= PMD_TYPE_TABLE
,
289 .prot_sect
= PMD_TYPE_SECT
| PMD_SECT_AP_WRITE
,
290 .domain
= DOMAIN_KERNEL
,
293 .prot_pte
= L_PTE_PRESENT
| L_PTE_YOUNG
| L_PTE_DIRTY
|
295 .prot_l1
= PMD_TYPE_TABLE
,
296 .prot_sect
= PMD_TYPE_SECT
| PMD_SECT_AP_WRITE
,
297 .domain
= DOMAIN_KERNEL
,
300 .prot_pte
= L_PTE_PRESENT
| L_PTE_YOUNG
| L_PTE_DIRTY
|
301 L_PTE_XN
| L_PTE_RDONLY
,
302 .prot_l1
= PMD_TYPE_TABLE
,
303 #ifdef CONFIG_ARM_LPAE
304 .prot_sect
= PMD_TYPE_SECT
| L_PMD_SECT_RDONLY
| PMD_SECT_AP2
,
306 .prot_sect
= PMD_TYPE_SECT
,
308 .domain
= DOMAIN_KERNEL
,
311 .prot_sect
= PMD_TYPE_SECT
,
312 .domain
= DOMAIN_KERNEL
,
314 [MT_MEMORY_RWX_NONCACHED
] = {
315 .prot_pte
= L_PTE_PRESENT
| L_PTE_YOUNG
| L_PTE_DIRTY
|
317 .prot_l1
= PMD_TYPE_TABLE
,
318 .prot_sect
= PMD_TYPE_SECT
| PMD_SECT_AP_WRITE
,
319 .domain
= DOMAIN_KERNEL
,
321 [MT_MEMORY_RW_DTCM
] = {
322 .prot_pte
= L_PTE_PRESENT
| L_PTE_YOUNG
| L_PTE_DIRTY
|
324 .prot_l1
= PMD_TYPE_TABLE
,
325 .prot_sect
= PMD_TYPE_SECT
| PMD_SECT_XN
,
326 .domain
= DOMAIN_KERNEL
,
328 [MT_MEMORY_RWX_ITCM
] = {
329 .prot_pte
= L_PTE_PRESENT
| L_PTE_YOUNG
| L_PTE_DIRTY
,
330 .prot_l1
= PMD_TYPE_TABLE
,
331 .domain
= DOMAIN_KERNEL
,
333 [MT_MEMORY_RW_SO
] = {
334 .prot_pte
= L_PTE_PRESENT
| L_PTE_YOUNG
| L_PTE_DIRTY
|
335 L_PTE_MT_UNCACHED
| L_PTE_XN
,
336 .prot_l1
= PMD_TYPE_TABLE
,
337 .prot_sect
= PMD_TYPE_SECT
| PMD_SECT_AP_WRITE
| PMD_SECT_S
|
338 PMD_SECT_UNCACHED
| PMD_SECT_XN
,
339 .domain
= DOMAIN_KERNEL
,
341 [MT_MEMORY_DMA_READY
] = {
342 .prot_pte
= L_PTE_PRESENT
| L_PTE_YOUNG
| L_PTE_DIRTY
|
344 .prot_l1
= PMD_TYPE_TABLE
,
345 .domain
= DOMAIN_KERNEL
,
349 const struct mem_type
*get_mem_type(unsigned int type
)
351 return type
< ARRAY_SIZE(mem_types
) ? &mem_types
[type
] : NULL
;
353 EXPORT_SYMBOL(get_mem_type
);
355 static pte_t
*(*pte_offset_fixmap
)(pmd_t
*dir
, unsigned long addr
);
357 static pte_t bm_pte
[PTRS_PER_PTE
+ PTE_HWTABLE_PTRS
]
358 __aligned(PTE_HWTABLE_OFF
+ PTE_HWTABLE_SIZE
) __initdata
;
360 static pte_t
* __init
pte_offset_early_fixmap(pmd_t
*dir
, unsigned long addr
)
362 return &bm_pte
[pte_index(addr
)];
365 static pte_t
*pte_offset_late_fixmap(pmd_t
*dir
, unsigned long addr
)
367 return pte_offset_kernel(dir
, addr
);
370 static inline pmd_t
* __init
fixmap_pmd(unsigned long addr
)
372 return pmd_off_k(addr
);
375 void __init
early_fixmap_init(void)
380 * The early fixmap range spans multiple pmds, for which
381 * we are not prepared:
383 BUILD_BUG_ON((__fix_to_virt(__end_of_early_ioremap_region
) >> PMD_SHIFT
)
384 != FIXADDR_TOP
>> PMD_SHIFT
);
386 pmd
= fixmap_pmd(FIXADDR_TOP
);
387 pmd_populate_kernel(&init_mm
, pmd
, bm_pte
);
389 pte_offset_fixmap
= pte_offset_early_fixmap
;
393 * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range().
394 * As a result, this can only be called with preemption disabled, as under
397 void __set_fixmap(enum fixed_addresses idx
, phys_addr_t phys
, pgprot_t prot
)
399 unsigned long vaddr
= __fix_to_virt(idx
);
400 pte_t
*pte
= pte_offset_fixmap(pmd_off_k(vaddr
), vaddr
);
402 /* Make sure fixmap region does not exceed available allocation. */
403 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses
) < FIXADDR_START
);
404 BUG_ON(idx
>= __end_of_fixed_addresses
);
406 /* We support only device mappings before pgprot_kernel is set. */
407 if (WARN_ON(pgprot_val(prot
) != pgprot_val(FIXMAP_PAGE_IO
) &&
408 pgprot_val(prot
) && pgprot_val(pgprot_kernel
) == 0))
411 if (pgprot_val(prot
))
412 set_pte_at(NULL
, vaddr
, pte
,
413 pfn_pte(phys
>> PAGE_SHIFT
, prot
));
415 pte_clear(NULL
, vaddr
, pte
);
416 local_flush_tlb_kernel_range(vaddr
, vaddr
+ PAGE_SIZE
);
419 static pgprot_t protection_map
[16] __ro_after_init
= {
420 [VM_NONE
] = __PAGE_NONE
,
421 [VM_READ
] = __PAGE_READONLY
,
422 [VM_WRITE
] = __PAGE_COPY
,
423 [VM_WRITE
| VM_READ
] = __PAGE_COPY
,
424 [VM_EXEC
] = __PAGE_READONLY_EXEC
,
425 [VM_EXEC
| VM_READ
] = __PAGE_READONLY_EXEC
,
426 [VM_EXEC
| VM_WRITE
] = __PAGE_COPY_EXEC
,
427 [VM_EXEC
| VM_WRITE
| VM_READ
] = __PAGE_COPY_EXEC
,
428 [VM_SHARED
] = __PAGE_NONE
,
429 [VM_SHARED
| VM_READ
] = __PAGE_READONLY
,
430 [VM_SHARED
| VM_WRITE
] = __PAGE_SHARED
,
431 [VM_SHARED
| VM_WRITE
| VM_READ
] = __PAGE_SHARED
,
432 [VM_SHARED
| VM_EXEC
] = __PAGE_READONLY_EXEC
,
433 [VM_SHARED
| VM_EXEC
| VM_READ
] = __PAGE_READONLY_EXEC
,
434 [VM_SHARED
| VM_EXEC
| VM_WRITE
] = __PAGE_SHARED_EXEC
,
435 [VM_SHARED
| VM_EXEC
| VM_WRITE
| VM_READ
] = __PAGE_SHARED_EXEC
437 DECLARE_VM_GET_PAGE_PROT
440 * Adjust the PMD section entries according to the CPU in use.
442 static void __init
build_mem_type_table(void)
444 struct cachepolicy
*cp
;
445 unsigned int cr
= get_cr();
446 pteval_t user_pgprot
, kern_pgprot
, vecs_pgprot
;
447 int cpu_arch
= cpu_architecture();
450 if (cpu_arch
< CPU_ARCH_ARMv6
) {
451 #if defined(CONFIG_CPU_DCACHE_DISABLE)
452 if (cachepolicy
> CPOLICY_BUFFERED
)
453 cachepolicy
= CPOLICY_BUFFERED
;
454 #elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
455 if (cachepolicy
> CPOLICY_WRITETHROUGH
)
456 cachepolicy
= CPOLICY_WRITETHROUGH
;
459 if (cpu_arch
< CPU_ARCH_ARMv5
) {
460 if (cachepolicy
>= CPOLICY_WRITEALLOC
)
461 cachepolicy
= CPOLICY_WRITEBACK
;
466 if (cachepolicy
!= CPOLICY_WRITEALLOC
) {
467 pr_warn("Forcing write-allocate cache policy for SMP\n");
468 cachepolicy
= CPOLICY_WRITEALLOC
;
470 if (!(initial_pmd_value
& PMD_SECT_S
)) {
471 pr_warn("Forcing shared mappings for SMP\n");
472 initial_pmd_value
|= PMD_SECT_S
;
477 * Strip out features not present on earlier architectures.
478 * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those
479 * without extended page tables don't have the 'Shared' bit.
481 if (cpu_arch
< CPU_ARCH_ARMv5
)
482 for (i
= 0; i
< ARRAY_SIZE(mem_types
); i
++)
483 mem_types
[i
].prot_sect
&= ~PMD_SECT_TEX(7);
484 if ((cpu_arch
< CPU_ARCH_ARMv6
|| !(cr
& CR_XP
)) && !cpu_is_xsc3())
485 for (i
= 0; i
< ARRAY_SIZE(mem_types
); i
++)
486 mem_types
[i
].prot_sect
&= ~PMD_SECT_S
;
489 * ARMv5 and lower, bit 4 must be set for page tables (was: cache
490 * "update-able on write" bit on ARM610). However, Xscale and
491 * Xscale3 require this bit to be cleared.
493 if (cpu_is_xscale_family()) {
494 for (i
= 0; i
< ARRAY_SIZE(mem_types
); i
++) {
495 mem_types
[i
].prot_sect
&= ~PMD_BIT4
;
496 mem_types
[i
].prot_l1
&= ~PMD_BIT4
;
498 } else if (cpu_arch
< CPU_ARCH_ARMv6
) {
499 for (i
= 0; i
< ARRAY_SIZE(mem_types
); i
++) {
500 if (mem_types
[i
].prot_l1
)
501 mem_types
[i
].prot_l1
|= PMD_BIT4
;
502 if (mem_types
[i
].prot_sect
)
503 mem_types
[i
].prot_sect
|= PMD_BIT4
;
508 * Mark the device areas according to the CPU/architecture.
510 if (cpu_is_xsc3() || (cpu_arch
>= CPU_ARCH_ARMv6
&& (cr
& CR_XP
))) {
511 if (!cpu_is_xsc3()) {
513 * Mark device regions on ARMv6+ as execute-never
514 * to prevent speculative instruction fetches.
516 mem_types
[MT_DEVICE
].prot_sect
|= PMD_SECT_XN
;
517 mem_types
[MT_DEVICE_NONSHARED
].prot_sect
|= PMD_SECT_XN
;
518 mem_types
[MT_DEVICE_CACHED
].prot_sect
|= PMD_SECT_XN
;
519 mem_types
[MT_DEVICE_WC
].prot_sect
|= PMD_SECT_XN
;
521 /* Also setup NX memory mapping */
522 mem_types
[MT_MEMORY_RW
].prot_sect
|= PMD_SECT_XN
;
523 mem_types
[MT_MEMORY_RO
].prot_sect
|= PMD_SECT_XN
;
525 if (cpu_arch
>= CPU_ARCH_ARMv7
&& (cr
& CR_TRE
)) {
527 * For ARMv7 with TEX remapping,
528 * - shared device is SXCB=1100
529 * - nonshared device is SXCB=0100
530 * - write combine device mem is SXCB=0001
531 * (Uncached Normal memory)
533 mem_types
[MT_DEVICE
].prot_sect
|= PMD_SECT_TEX(1);
534 mem_types
[MT_DEVICE_NONSHARED
].prot_sect
|= PMD_SECT_TEX(1);
535 mem_types
[MT_DEVICE_WC
].prot_sect
|= PMD_SECT_BUFFERABLE
;
536 } else if (cpu_is_xsc3()) {
539 * - shared device is TEXCB=00101
540 * - nonshared device is TEXCB=01000
541 * - write combine device mem is TEXCB=00100
542 * (Inner/Outer Uncacheable in xsc3 parlance)
544 mem_types
[MT_DEVICE
].prot_sect
|= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED
;
545 mem_types
[MT_DEVICE_NONSHARED
].prot_sect
|= PMD_SECT_TEX(2);
546 mem_types
[MT_DEVICE_WC
].prot_sect
|= PMD_SECT_TEX(1);
549 * For ARMv6 and ARMv7 without TEX remapping,
550 * - shared device is TEXCB=00001
551 * - nonshared device is TEXCB=01000
552 * - write combine device mem is TEXCB=00100
553 * (Uncached Normal in ARMv6 parlance).
555 mem_types
[MT_DEVICE
].prot_sect
|= PMD_SECT_BUFFERED
;
556 mem_types
[MT_DEVICE_NONSHARED
].prot_sect
|= PMD_SECT_TEX(2);
557 mem_types
[MT_DEVICE_WC
].prot_sect
|= PMD_SECT_TEX(1);
561 * On others, write combining is "Uncached/Buffered"
563 mem_types
[MT_DEVICE_WC
].prot_sect
|= PMD_SECT_BUFFERABLE
;
567 * Now deal with the memory-type mappings
569 cp
= &cache_policies
[cachepolicy
];
570 vecs_pgprot
= kern_pgprot
= user_pgprot
= cp
->pte
;
572 #ifndef CONFIG_ARM_LPAE
574 * We don't use domains on ARMv6 (since this causes problems with
575 * v6/v7 kernels), so we must use a separate memory type for user
576 * r/o, kernel r/w to map the vectors page.
578 if (cpu_arch
== CPU_ARCH_ARMv6
)
579 vecs_pgprot
|= L_PTE_MT_VECTORS
;
582 * Check is it with support for the PXN bit
583 * in the Short-descriptor translation table format descriptors.
585 if (cpu_arch
== CPU_ARCH_ARMv7
&&
586 (read_cpuid_ext(CPUID_EXT_MMFR0
) & 0xF) >= 4) {
587 user_pmd_table
|= PMD_PXNTABLE
;
592 * ARMv6 and above have extended page tables.
594 if (cpu_arch
>= CPU_ARCH_ARMv6
&& (cr
& CR_XP
)) {
595 #ifndef CONFIG_ARM_LPAE
597 * Mark cache clean areas and XIP ROM read only
598 * from SVC mode and no access from userspace.
600 mem_types
[MT_ROM
].prot_sect
|= PMD_SECT_APX
|PMD_SECT_AP_WRITE
;
601 mem_types
[MT_MINICLEAN
].prot_sect
|= PMD_SECT_APX
|PMD_SECT_AP_WRITE
;
602 mem_types
[MT_CACHECLEAN
].prot_sect
|= PMD_SECT_APX
|PMD_SECT_AP_WRITE
;
603 mem_types
[MT_MEMORY_RO
].prot_sect
|= PMD_SECT_APX
|PMD_SECT_AP_WRITE
;
607 * If the initial page tables were created with the S bit
608 * set, then we need to do the same here for the same
609 * reasons given in early_cachepolicy().
611 if (initial_pmd_value
& PMD_SECT_S
) {
612 user_pgprot
|= L_PTE_SHARED
;
613 kern_pgprot
|= L_PTE_SHARED
;
614 vecs_pgprot
|= L_PTE_SHARED
;
615 mem_types
[MT_DEVICE_WC
].prot_sect
|= PMD_SECT_S
;
616 mem_types
[MT_DEVICE_WC
].prot_pte
|= L_PTE_SHARED
;
617 mem_types
[MT_DEVICE_CACHED
].prot_sect
|= PMD_SECT_S
;
618 mem_types
[MT_DEVICE_CACHED
].prot_pte
|= L_PTE_SHARED
;
619 mem_types
[MT_MEMORY_RWX
].prot_sect
|= PMD_SECT_S
;
620 mem_types
[MT_MEMORY_RWX
].prot_pte
|= L_PTE_SHARED
;
621 mem_types
[MT_MEMORY_RW
].prot_sect
|= PMD_SECT_S
;
622 mem_types
[MT_MEMORY_RW
].prot_pte
|= L_PTE_SHARED
;
623 mem_types
[MT_MEMORY_RO
].prot_sect
|= PMD_SECT_S
;
624 mem_types
[MT_MEMORY_RO
].prot_pte
|= L_PTE_SHARED
;
625 mem_types
[MT_MEMORY_DMA_READY
].prot_pte
|= L_PTE_SHARED
;
626 mem_types
[MT_MEMORY_RWX_NONCACHED
].prot_sect
|= PMD_SECT_S
;
627 mem_types
[MT_MEMORY_RWX_NONCACHED
].prot_pte
|= L_PTE_SHARED
;
632 * Non-cacheable Normal - intended for memory areas that must
633 * not cause dirty cache line writebacks when used
635 if (cpu_arch
>= CPU_ARCH_ARMv6
) {
636 if (cpu_arch
>= CPU_ARCH_ARMv7
&& (cr
& CR_TRE
)) {
637 /* Non-cacheable Normal is XCB = 001 */
638 mem_types
[MT_MEMORY_RWX_NONCACHED
].prot_sect
|=
641 /* For both ARMv6 and non-TEX-remapping ARMv7 */
642 mem_types
[MT_MEMORY_RWX_NONCACHED
].prot_sect
|=
646 mem_types
[MT_MEMORY_RWX_NONCACHED
].prot_sect
|= PMD_SECT_BUFFERABLE
;
649 #ifdef CONFIG_ARM_LPAE
651 * Do not generate access flag faults for the kernel mappings.
653 for (i
= 0; i
< ARRAY_SIZE(mem_types
); i
++) {
654 mem_types
[i
].prot_pte
|= PTE_EXT_AF
;
655 if (mem_types
[i
].prot_sect
)
656 mem_types
[i
].prot_sect
|= PMD_SECT_AF
;
658 kern_pgprot
|= PTE_EXT_AF
;
659 vecs_pgprot
|= PTE_EXT_AF
;
662 * Set PXN for user mappings
664 user_pgprot
|= PTE_EXT_PXN
;
667 for (i
= 0; i
< 16; i
++) {
668 pteval_t v
= pgprot_val(protection_map
[i
]);
669 protection_map
[i
] = __pgprot(v
| user_pgprot
);
672 mem_types
[MT_LOW_VECTORS
].prot_pte
|= vecs_pgprot
;
673 mem_types
[MT_HIGH_VECTORS
].prot_pte
|= vecs_pgprot
;
675 pgprot_user
= __pgprot(L_PTE_PRESENT
| L_PTE_YOUNG
| user_pgprot
);
676 pgprot_kernel
= __pgprot(L_PTE_PRESENT
| L_PTE_YOUNG
|
677 L_PTE_DIRTY
| kern_pgprot
);
679 mem_types
[MT_LOW_VECTORS
].prot_l1
|= ecc_mask
;
680 mem_types
[MT_HIGH_VECTORS
].prot_l1
|= ecc_mask
;
681 mem_types
[MT_MEMORY_RWX
].prot_sect
|= ecc_mask
| cp
->pmd
;
682 mem_types
[MT_MEMORY_RWX
].prot_pte
|= kern_pgprot
;
683 mem_types
[MT_MEMORY_RW
].prot_sect
|= ecc_mask
| cp
->pmd
;
684 mem_types
[MT_MEMORY_RW
].prot_pte
|= kern_pgprot
;
685 mem_types
[MT_MEMORY_RO
].prot_sect
|= ecc_mask
| cp
->pmd
;
686 mem_types
[MT_MEMORY_RO
].prot_pte
|= kern_pgprot
;
687 mem_types
[MT_MEMORY_DMA_READY
].prot_pte
|= kern_pgprot
;
688 mem_types
[MT_MEMORY_RWX_NONCACHED
].prot_sect
|= ecc_mask
;
689 mem_types
[MT_ROM
].prot_sect
|= cp
->pmd
;
693 mem_types
[MT_CACHECLEAN
].prot_sect
|= PMD_SECT_WT
;
697 mem_types
[MT_CACHECLEAN
].prot_sect
|= PMD_SECT_WB
;
700 pr_info("Memory policy: %sData cache %s\n",
701 ecc_mask
? "ECC enabled, " : "", cp
->policy
);
703 for (i
= 0; i
< ARRAY_SIZE(mem_types
); i
++) {
704 struct mem_type
*t
= &mem_types
[i
];
706 t
->prot_l1
|= PMD_DOMAIN(t
->domain
);
708 t
->prot_sect
|= PMD_DOMAIN(t
->domain
);
712 #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
713 pgprot_t
phys_mem_access_prot(struct file
*file
, unsigned long pfn
,
714 unsigned long size
, pgprot_t vma_prot
)
717 return pgprot_noncached(vma_prot
);
718 else if (file
->f_flags
& O_SYNC
)
719 return pgprot_writecombine(vma_prot
);
722 EXPORT_SYMBOL(phys_mem_access_prot
);
725 #define vectors_base() (vectors_high() ? 0xffff0000 : 0)
727 static void __init
*early_alloc(unsigned long sz
)
729 void *ptr
= memblock_alloc(sz
, sz
);
732 panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
738 static void *__init
late_alloc(unsigned long sz
)
740 void *ptdesc
= pagetable_alloc(GFP_PGTABLE_KERNEL
& ~__GFP_HIGHMEM
,
743 if (!ptdesc
|| !pagetable_pte_ctor(ptdesc
))
745 return ptdesc_to_virt(ptdesc
);
748 static pte_t
* __init
arm_pte_alloc(pmd_t
*pmd
, unsigned long addr
,
750 void *(*alloc
)(unsigned long sz
))
752 if (pmd_none(*pmd
)) {
753 pte_t
*pte
= alloc(PTE_HWTABLE_OFF
+ PTE_HWTABLE_SIZE
);
754 __pmd_populate(pmd
, __pa(pte
), prot
);
756 BUG_ON(pmd_bad(*pmd
));
757 return pte_offset_kernel(pmd
, addr
);
760 static pte_t
* __init
early_pte_alloc(pmd_t
*pmd
, unsigned long addr
,
763 return arm_pte_alloc(pmd
, addr
, prot
, early_alloc
);
766 static void __init
alloc_init_pte(pmd_t
*pmd
, unsigned long addr
,
767 unsigned long end
, unsigned long pfn
,
768 const struct mem_type
*type
,
769 void *(*alloc
)(unsigned long sz
),
772 pte_t
*pte
= arm_pte_alloc(pmd
, addr
, type
->prot_l1
, alloc
);
774 set_pte_ext(pte
, pfn_pte(pfn
, __pgprot(type
->prot_pte
)),
775 ng
? PTE_EXT_NG
: 0);
777 } while (pte
++, addr
+= PAGE_SIZE
, addr
!= end
);
780 static void __init
__map_init_section(pmd_t
*pmd
, unsigned long addr
,
781 unsigned long end
, phys_addr_t phys
,
782 const struct mem_type
*type
, bool ng
)
786 #ifndef CONFIG_ARM_LPAE
788 * In classic MMU format, puds and pmds are folded in to
789 * the pgds. pmd_offset gives the PGD entry. PGDs refer to a
790 * group of L1 entries making up one logical pointer to
791 * an L2 table (2MB), where as PMDs refer to the individual
792 * L1 entries (1MB). Hence increment to get the correct
793 * offset for odd 1MB sections.
794 * (See arch/arm/include/asm/pgtable-2level.h)
796 if (addr
& SECTION_SIZE
)
800 *pmd
= __pmd(phys
| type
->prot_sect
| (ng
? PMD_SECT_nG
: 0));
801 phys
+= SECTION_SIZE
;
802 } while (pmd
++, addr
+= SECTION_SIZE
, addr
!= end
);
807 static void __init
alloc_init_pmd(pud_t
*pud
, unsigned long addr
,
808 unsigned long end
, phys_addr_t phys
,
809 const struct mem_type
*type
,
810 void *(*alloc
)(unsigned long sz
), bool ng
)
812 pmd_t
*pmd
= pmd_offset(pud
, addr
);
817 * With LPAE, we must loop over to map
818 * all the pmds for the given range.
820 next
= pmd_addr_end(addr
, end
);
823 * Try a section mapping - addr, next and phys must all be
824 * aligned to a section boundary.
826 if (type
->prot_sect
&&
827 ((addr
| next
| phys
) & ~SECTION_MASK
) == 0) {
828 __map_init_section(pmd
, addr
, next
, phys
, type
, ng
);
830 alloc_init_pte(pmd
, addr
, next
,
831 __phys_to_pfn(phys
), type
, alloc
, ng
);
836 } while (pmd
++, addr
= next
, addr
!= end
);
839 static void __init
alloc_init_pud(p4d_t
*p4d
, unsigned long addr
,
840 unsigned long end
, phys_addr_t phys
,
841 const struct mem_type
*type
,
842 void *(*alloc
)(unsigned long sz
), bool ng
)
844 pud_t
*pud
= pud_offset(p4d
, addr
);
848 next
= pud_addr_end(addr
, end
);
849 alloc_init_pmd(pud
, addr
, next
, phys
, type
, alloc
, ng
);
851 } while (pud
++, addr
= next
, addr
!= end
);
854 static void __init
alloc_init_p4d(pgd_t
*pgd
, unsigned long addr
,
855 unsigned long end
, phys_addr_t phys
,
856 const struct mem_type
*type
,
857 void *(*alloc
)(unsigned long sz
), bool ng
)
859 p4d_t
*p4d
= p4d_offset(pgd
, addr
);
863 next
= p4d_addr_end(addr
, end
);
864 alloc_init_pud(p4d
, addr
, next
, phys
, type
, alloc
, ng
);
866 } while (p4d
++, addr
= next
, addr
!= end
);
869 #ifndef CONFIG_ARM_LPAE
870 static void __init
create_36bit_mapping(struct mm_struct
*mm
,
872 const struct mem_type
*type
,
875 unsigned long addr
, length
, end
;
880 phys
= __pfn_to_phys(md
->pfn
);
881 length
= PAGE_ALIGN(md
->length
);
883 if (!(cpu_architecture() >= CPU_ARCH_ARMv6
|| cpu_is_xsc3())) {
884 pr_err("MM: CPU does not support supersection mapping for 0x%08llx at 0x%08lx\n",
885 (long long)__pfn_to_phys((u64
)md
->pfn
), addr
);
889 /* N.B. ARMv6 supersections are only defined to work with domain 0.
890 * Since domain assignments can in fact be arbitrary, the
891 * 'domain == 0' check below is required to insure that ARMv6
892 * supersections are only allocated for domain 0 regardless
893 * of the actual domain assignments in use.
896 pr_err("MM: invalid domain in supersection mapping for 0x%08llx at 0x%08lx\n",
897 (long long)__pfn_to_phys((u64
)md
->pfn
), addr
);
901 if ((addr
| length
| __pfn_to_phys(md
->pfn
)) & ~SUPERSECTION_MASK
) {
902 pr_err("MM: cannot create mapping for 0x%08llx at 0x%08lx invalid alignment\n",
903 (long long)__pfn_to_phys((u64
)md
->pfn
), addr
);
908 * Shift bits [35:32] of address into bits [23:20] of PMD
911 phys
|= (((md
->pfn
>> (32 - PAGE_SHIFT
)) & 0xF) << 20);
913 pgd
= pgd_offset(mm
, addr
);
916 p4d_t
*p4d
= p4d_offset(pgd
, addr
);
917 pud_t
*pud
= pud_offset(p4d
, addr
);
918 pmd_t
*pmd
= pmd_offset(pud
, addr
);
921 for (i
= 0; i
< 16; i
++)
922 *pmd
++ = __pmd(phys
| type
->prot_sect
| PMD_SECT_SUPER
|
923 (ng
? PMD_SECT_nG
: 0));
925 addr
+= SUPERSECTION_SIZE
;
926 phys
+= SUPERSECTION_SIZE
;
927 pgd
+= SUPERSECTION_SIZE
>> PGDIR_SHIFT
;
928 } while (addr
!= end
);
930 #endif /* !CONFIG_ARM_LPAE */
932 static void __init
__create_mapping(struct mm_struct
*mm
, struct map_desc
*md
,
933 void *(*alloc
)(unsigned long sz
),
936 unsigned long addr
, length
, end
;
938 const struct mem_type
*type
;
941 type
= &mem_types
[md
->type
];
943 #ifndef CONFIG_ARM_LPAE
945 * Catch 36-bit addresses
947 if (md
->pfn
>= 0x100000) {
948 create_36bit_mapping(mm
, md
, type
, ng
);
953 addr
= md
->virtual & PAGE_MASK
;
954 phys
= __pfn_to_phys(md
->pfn
);
955 length
= PAGE_ALIGN(md
->length
+ (md
->virtual & ~PAGE_MASK
));
957 if (type
->prot_l1
== 0 && ((addr
| phys
| length
) & ~SECTION_MASK
)) {
958 pr_warn("BUG: map for 0x%08llx at 0x%08lx can not be mapped using pages, ignoring.\n",
959 (long long)__pfn_to_phys(md
->pfn
), addr
);
963 pgd
= pgd_offset(mm
, addr
);
966 unsigned long next
= pgd_addr_end(addr
, end
);
968 alloc_init_p4d(pgd
, addr
, next
, phys
, type
, alloc
, ng
);
972 } while (pgd
++, addr
!= end
);
976 * Create the page directory entries and any necessary
977 * page tables for the mapping specified by `md'. We
978 * are able to cope here with varying sizes and address
979 * offsets, and we take full advantage of sections and
982 static void __init
create_mapping(struct map_desc
*md
)
984 if (md
->virtual != vectors_base() && md
->virtual < TASK_SIZE
) {
985 pr_warn("BUG: not creating mapping for 0x%08llx at 0x%08lx in user region\n",
986 (long long)__pfn_to_phys((u64
)md
->pfn
), md
->virtual);
990 if (md
->type
== MT_DEVICE
&&
991 md
->virtual >= PAGE_OFFSET
&& md
->virtual < FIXADDR_START
&&
992 (md
->virtual < VMALLOC_START
|| md
->virtual >= VMALLOC_END
)) {
993 pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
994 (long long)__pfn_to_phys((u64
)md
->pfn
), md
->virtual);
997 __create_mapping(&init_mm
, md
, early_alloc
, false);
1000 void __init
create_mapping_late(struct mm_struct
*mm
, struct map_desc
*md
,
1003 #ifdef CONFIG_ARM_LPAE
1007 p4d
= p4d_alloc(mm
, pgd_offset(mm
, md
->virtual), md
->virtual);
1010 pud
= pud_alloc(mm
, p4d
, md
->virtual);
1013 pmd_alloc(mm
, pud
, 0);
1015 __create_mapping(mm
, md
, late_alloc
, ng
);
1019 * Create the architecture specific mappings
1021 void __init
iotable_init(struct map_desc
*io_desc
, int nr
)
1023 struct map_desc
*md
;
1024 struct vm_struct
*vm
;
1025 struct static_vm
*svm
;
1030 svm
= memblock_alloc(sizeof(*svm
) * nr
, __alignof__(*svm
));
1032 panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
1033 __func__
, sizeof(*svm
) * nr
, __alignof__(*svm
));
1035 for (md
= io_desc
; nr
; md
++, nr
--) {
1039 vm
->addr
= (void *)(md
->virtual & PAGE_MASK
);
1040 vm
->size
= PAGE_ALIGN(md
->length
+ (md
->virtual & ~PAGE_MASK
));
1041 vm
->phys_addr
= __pfn_to_phys(md
->pfn
);
1042 vm
->flags
= VM_IOREMAP
| VM_ARM_STATIC_MAPPING
;
1043 vm
->flags
|= VM_ARM_MTYPE(md
->type
);
1044 vm
->caller
= iotable_init
;
1045 add_static_vm_early(svm
++);
1049 void __init
vm_reserve_area_early(unsigned long addr
, unsigned long size
,
1052 struct vm_struct
*vm
;
1053 struct static_vm
*svm
;
1055 svm
= memblock_alloc(sizeof(*svm
), __alignof__(*svm
));
1057 panic("%s: Failed to allocate %zu bytes align=0x%zx\n",
1058 __func__
, sizeof(*svm
), __alignof__(*svm
));
1061 vm
->addr
= (void *)addr
;
1063 vm
->flags
= VM_IOREMAP
| VM_ARM_EMPTY_MAPPING
;
1064 vm
->caller
= caller
;
1065 add_static_vm_early(svm
);
1068 #ifndef CONFIG_ARM_LPAE
1071 * The Linux PMD is made of two consecutive section entries covering 2MB
1072 * (see definition in include/asm/pgtable-2level.h). However a call to
1073 * create_mapping() may optimize static mappings by using individual
1074 * 1MB section mappings. This leaves the actual PMD potentially half
1075 * initialized if the top or bottom section entry isn't used, leaving it
1076 * open to problems if a subsequent ioremap() or vmalloc() tries to use
1077 * the virtual space left free by that unused section entry.
1079 * Let's avoid the issue by inserting dummy vm entries covering the unused
1080 * PMD halves once the static mappings are in place.
1083 static void __init
pmd_empty_section_gap(unsigned long addr
)
1085 vm_reserve_area_early(addr
, SECTION_SIZE
, pmd_empty_section_gap
);
1088 static void __init
fill_pmd_gaps(void)
1090 struct static_vm
*svm
;
1091 struct vm_struct
*vm
;
1092 unsigned long addr
, next
= 0;
1095 list_for_each_entry(svm
, &static_vmlist
, list
) {
1097 addr
= (unsigned long)vm
->addr
;
1102 * Check if this vm starts on an odd section boundary.
1103 * If so and the first section entry for this PMD is free
1104 * then we block the corresponding virtual address.
1106 if ((addr
& ~PMD_MASK
) == SECTION_SIZE
) {
1107 pmd
= pmd_off_k(addr
);
1109 pmd_empty_section_gap(addr
& PMD_MASK
);
1113 * Then check if this vm ends on an odd section boundary.
1114 * If so and the second section entry for this PMD is empty
1115 * then we block the corresponding virtual address.
1118 if ((addr
& ~PMD_MASK
) == SECTION_SIZE
) {
1119 pmd
= pmd_off_k(addr
) + 1;
1121 pmd_empty_section_gap(addr
);
1124 /* no need to look at any vm entry until we hit the next PMD */
1125 next
= (addr
+ PMD_SIZE
- 1) & PMD_MASK
;
1130 #define fill_pmd_gaps() do { } while (0)
1133 #if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H)
1134 static void __init
pci_reserve_io(void)
1136 struct static_vm
*svm
;
1138 svm
= find_static_vm_vaddr((void *)PCI_IO_VIRT_BASE
);
1142 vm_reserve_area_early(PCI_IO_VIRT_BASE
, SZ_2M
, pci_reserve_io
);
1145 #define pci_reserve_io() do { } while (0)
1148 #ifdef CONFIG_DEBUG_LL
1149 void __init
debug_ll_io_init(void)
1151 struct map_desc map
;
1153 debug_ll_addr(&map
.pfn
, &map
.virtual);
1154 if (!map
.pfn
|| !map
.virtual)
1156 map
.pfn
= __phys_to_pfn(map
.pfn
);
1157 map
.virtual &= PAGE_MASK
;
1158 map
.length
= PAGE_SIZE
;
1159 map
.type
= MT_DEVICE
;
1160 iotable_init(&map
, 1);
1164 static unsigned long __initdata vmalloc_size
= 240 * SZ_1M
;
1167 * vmalloc=size forces the vmalloc area to be exactly 'size'
1168 * bytes. This can be used to increase (or decrease) the vmalloc
1169 * area - the default is 240MiB.
1171 static int __init
early_vmalloc(char *arg
)
1173 unsigned long vmalloc_reserve
= memparse(arg
, NULL
);
1174 unsigned long vmalloc_max
;
1176 if (vmalloc_reserve
< SZ_16M
) {
1177 vmalloc_reserve
= SZ_16M
;
1178 pr_warn("vmalloc area is too small, limiting to %luMiB\n",
1179 vmalloc_reserve
>> 20);
1182 vmalloc_max
= VMALLOC_END
- (PAGE_OFFSET
+ SZ_32M
+ VMALLOC_OFFSET
);
1183 if (vmalloc_reserve
> vmalloc_max
) {
1184 vmalloc_reserve
= vmalloc_max
;
1185 pr_warn("vmalloc area is too big, limiting to %luMiB\n",
1186 vmalloc_reserve
>> 20);
1189 vmalloc_size
= vmalloc_reserve
;
1192 early_param("vmalloc", early_vmalloc
);
1194 phys_addr_t arm_lowmem_limit __initdata
= 0;
1196 void __init
adjust_lowmem_bounds(void)
1198 phys_addr_t block_start
, block_end
, memblock_limit
= 0;
1199 u64 vmalloc_limit
, i
;
1200 phys_addr_t lowmem_limit
= 0;
1203 * Let's use our own (unoptimized) equivalent of __pa() that is
1204 * not affected by wrap-arounds when sizeof(phys_addr_t) == 4.
1205 * The result is used as the upper bound on physical memory address
1206 * and may itself be outside the valid range for which phys_addr_t
1207 * and therefore __pa() is defined.
1209 vmalloc_limit
= (u64
)VMALLOC_END
- vmalloc_size
- VMALLOC_OFFSET
-
1210 PAGE_OFFSET
+ PHYS_OFFSET
;
1213 * The first usable region must be PMD aligned. Mark its start
1214 * as MEMBLOCK_NOMAP if it isn't
1216 for_each_mem_range(i
, &block_start
, &block_end
) {
1217 if (!IS_ALIGNED(block_start
, PMD_SIZE
)) {
1220 len
= round_up(block_start
, PMD_SIZE
) - block_start
;
1221 memblock_mark_nomap(block_start
, len
);
1226 for_each_mem_range(i
, &block_start
, &block_end
) {
1227 if (block_start
< vmalloc_limit
) {
1228 if (block_end
> lowmem_limit
)
1230 * Compare as u64 to ensure vmalloc_limit does
1231 * not get truncated. block_end should always
1232 * fit in phys_addr_t so there should be no
1233 * issue with assignment.
1235 lowmem_limit
= min_t(u64
,
1240 * Find the first non-pmd-aligned page, and point
1241 * memblock_limit at it. This relies on rounding the
1242 * limit down to be pmd-aligned, which happens at the
1243 * end of this function.
1245 * With this algorithm, the start or end of almost any
1246 * bank can be non-pmd-aligned. The only exception is
1247 * that the start of the bank 0 must be section-
1248 * aligned, since otherwise memory would need to be
1249 * allocated when mapping the start of bank 0, which
1250 * occurs before any free memory is mapped.
1252 if (!memblock_limit
) {
1253 if (!IS_ALIGNED(block_start
, PMD_SIZE
))
1254 memblock_limit
= block_start
;
1255 else if (!IS_ALIGNED(block_end
, PMD_SIZE
))
1256 memblock_limit
= lowmem_limit
;
1262 arm_lowmem_limit
= lowmem_limit
;
1264 high_memory
= __va(arm_lowmem_limit
- 1) + 1;
1266 if (!memblock_limit
)
1267 memblock_limit
= arm_lowmem_limit
;
1270 * Round the memblock limit down to a pmd size. This
1271 * helps to ensure that we will allocate memory from the
1272 * last full pmd, which should be mapped.
1274 memblock_limit
= round_down(memblock_limit
, PMD_SIZE
);
1276 if (!IS_ENABLED(CONFIG_HIGHMEM
) || cache_is_vipt_aliasing()) {
1277 if (memblock_end_of_DRAM() > arm_lowmem_limit
) {
1278 phys_addr_t end
= memblock_end_of_DRAM();
1280 pr_notice("Ignoring RAM at %pa-%pa\n",
1281 &memblock_limit
, &end
);
1282 pr_notice("Consider using a HIGHMEM enabled kernel.\n");
1284 memblock_remove(memblock_limit
, end
- memblock_limit
);
1288 memblock_set_current_limit(memblock_limit
);
1291 static __init
void prepare_page_table(void)
1297 * Clear out all the mappings below the kernel image.
1301 * KASan's shadow memory inserts itself between the TASK_SIZE
1302 * and MODULES_VADDR. Do not clear the KASan shadow memory mappings.
1304 for (addr
= 0; addr
< KASAN_SHADOW_START
; addr
+= PMD_SIZE
)
1305 pmd_clear(pmd_off_k(addr
));
1307 * Skip over the KASan shadow area. KASAN_SHADOW_END is sometimes
1308 * equal to MODULES_VADDR and then we exit the pmd clearing. If we
1309 * are using a thumb-compiled kernel, there there will be 8MB more
1310 * to clear as KASan always offset to 16 MB below MODULES_VADDR.
1312 for (addr
= KASAN_SHADOW_END
; addr
< MODULES_VADDR
; addr
+= PMD_SIZE
)
1313 pmd_clear(pmd_off_k(addr
));
1315 for (addr
= 0; addr
< MODULES_VADDR
; addr
+= PMD_SIZE
)
1316 pmd_clear(pmd_off_k(addr
));
1319 #ifdef CONFIG_XIP_KERNEL
1320 /* The XIP kernel is mapped in the module area -- skip over it */
1321 addr
= ((unsigned long)_exiprom
+ PMD_SIZE
- 1) & PMD_MASK
;
1323 for ( ; addr
< PAGE_OFFSET
; addr
+= PMD_SIZE
)
1324 pmd_clear(pmd_off_k(addr
));
1327 * Find the end of the first block of lowmem.
1329 end
= memblock
.memory
.regions
[0].base
+ memblock
.memory
.regions
[0].size
;
1330 if (end
>= arm_lowmem_limit
)
1331 end
= arm_lowmem_limit
;
1334 * Clear out all the kernel space mappings, except for the first
1335 * memory bank, up to the vmalloc region.
1337 for (addr
= __phys_to_virt(end
);
1338 addr
< VMALLOC_START
; addr
+= PMD_SIZE
)
1339 pmd_clear(pmd_off_k(addr
));
1342 #ifdef CONFIG_ARM_LPAE
1343 /* the first page is reserved for pgd */
1344 #define SWAPPER_PG_DIR_SIZE (PAGE_SIZE + \
1345 PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t))
1347 #define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
1351 * Reserve the special regions of memory
1353 void __init
arm_mm_memblock_reserve(void)
1356 * Reserve the page tables. These are already in use,
1357 * and can only be in node 0.
1359 memblock_reserve(__pa(swapper_pg_dir
), SWAPPER_PG_DIR_SIZE
);
1361 #ifdef CONFIG_SA1111
1363 * Because of the SA1111 DMA bug, we want to preserve our
1364 * precious DMA-able memory...
1366 memblock_reserve(PHYS_OFFSET
, __pa(swapper_pg_dir
) - PHYS_OFFSET
);
1371 * Set up the device mappings. Since we clear out the page tables for all
1372 * mappings above VMALLOC_START, except early fixmap, we might remove debug
1373 * device mappings. This means earlycon can be used to debug this function
1374 * Any other function or debugging method which may touch any device _will_
1377 static void __init
devicemaps_init(const struct machine_desc
*mdesc
)
1379 struct map_desc map
;
1384 * Allocate the vector page early.
1386 vectors
= early_alloc(PAGE_SIZE
* 2);
1388 early_trap_init(vectors
);
1391 * Clear page table except top pmd used by early fixmaps
1393 for (addr
= VMALLOC_START
; addr
< (FIXADDR_TOP
& PMD_MASK
); addr
+= PMD_SIZE
)
1394 pmd_clear(pmd_off_k(addr
));
1396 if (__atags_pointer
) {
1397 /* create a read-only mapping of the device tree */
1398 map
.pfn
= __phys_to_pfn(__atags_pointer
& SECTION_MASK
);
1399 map
.virtual = FDT_FIXED_BASE
;
1400 map
.length
= FDT_FIXED_SIZE
;
1401 map
.type
= MT_MEMORY_RO
;
1402 create_mapping(&map
);
1406 * Map the cache flushing regions.
1409 map
.pfn
= __phys_to_pfn(FLUSH_BASE_PHYS
);
1410 map
.virtual = FLUSH_BASE
;
1412 map
.type
= MT_CACHECLEAN
;
1413 create_mapping(&map
);
1415 #ifdef FLUSH_BASE_MINICACHE
1416 map
.pfn
= __phys_to_pfn(FLUSH_BASE_PHYS
+ SZ_1M
);
1417 map
.virtual = FLUSH_BASE_MINICACHE
;
1419 map
.type
= MT_MINICLEAN
;
1420 create_mapping(&map
);
1424 * Create a mapping for the machine vectors at the high-vectors
1425 * location (0xffff0000). If we aren't using high-vectors, also
1426 * create a mapping at the low-vectors virtual address.
1428 map
.pfn
= __phys_to_pfn(virt_to_phys(vectors
));
1429 map
.virtual = 0xffff0000;
1430 map
.length
= PAGE_SIZE
;
1431 #ifdef CONFIG_KUSER_HELPERS
1432 map
.type
= MT_HIGH_VECTORS
;
1434 map
.type
= MT_LOW_VECTORS
;
1436 create_mapping(&map
);
1438 if (!vectors_high()) {
1440 map
.length
= PAGE_SIZE
* 2;
1441 map
.type
= MT_LOW_VECTORS
;
1442 create_mapping(&map
);
1445 /* Now create a kernel read-only mapping */
1447 map
.virtual = 0xffff0000 + PAGE_SIZE
;
1448 map
.length
= PAGE_SIZE
;
1449 map
.type
= MT_LOW_VECTORS
;
1450 create_mapping(&map
);
1453 * Ask the machine support to map in the statically mapped devices.
1461 /* Reserve fixed i/o space in VMALLOC region */
1465 * Finally flush the caches and tlb to ensure that we're in a
1466 * consistent state wrt the writebuffer. This also ensures that
1467 * any write-allocated cache lines in the vector page are written
1468 * back. After this point, we can start to touch devices again.
1470 local_flush_tlb_all();
1473 /* Enable asynchronous aborts */
1477 static void __init
kmap_init(void)
1479 #ifdef CONFIG_HIGHMEM
1480 pkmap_page_table
= early_pte_alloc(pmd_off_k(PKMAP_BASE
),
1481 PKMAP_BASE
, _PAGE_KERNEL_TABLE
);
1484 early_pte_alloc(pmd_off_k(FIXADDR_START
), FIXADDR_START
,
1485 _PAGE_KERNEL_TABLE
);
1488 static void __init
map_lowmem(void)
1490 phys_addr_t start
, end
;
1493 /* Map all the lowmem memory banks. */
1494 for_each_mem_range(i
, &start
, &end
) {
1495 struct map_desc map
;
1497 pr_debug("map lowmem start: 0x%08llx, end: 0x%08llx\n",
1498 (long long)start
, (long long)end
);
1499 if (end
> arm_lowmem_limit
)
1500 end
= arm_lowmem_limit
;
1505 * If our kernel image is in the VMALLOC area we need to remove
1506 * the kernel physical memory from lowmem since the kernel will
1507 * be mapped separately.
1509 * The kernel will typically be at the very start of lowmem,
1510 * but any placement relative to memory ranges is possible.
1512 * If the memblock contains the kernel, we have to chisel out
1513 * the kernel memory from it and map each part separately. We
1514 * get 6 different theoretical cases:
1516 * +--------+ +--------+
1517 * +-- start --+ +--------+ | Kernel | | Kernel |
1518 * | | | Kernel | | case 2 | | case 5 |
1519 * | | | case 1 | +--------+ | | +--------+
1520 * | Memory | +--------+ | | | Kernel |
1521 * | range | +--------+ | | | case 6 |
1522 * | | | Kernel | +--------+ | | +--------+
1523 * | | | case 3 | | Kernel | | |
1524 * +-- end ----+ +--------+ | case 4 | | |
1525 * +--------+ +--------+
1528 /* Case 5: kernel covers range, don't map anything, should be rare */
1529 if ((start
> kernel_sec_start
) && (end
< kernel_sec_end
))
1532 /* Cases where the kernel is starting inside the range */
1533 if ((kernel_sec_start
>= start
) && (kernel_sec_start
<= end
)) {
1534 /* Case 6: kernel is embedded in the range, we need two mappings */
1535 if ((start
< kernel_sec_start
) && (end
> kernel_sec_end
)) {
1536 /* Map memory below the kernel */
1537 map
.pfn
= __phys_to_pfn(start
);
1538 map
.virtual = __phys_to_virt(start
);
1539 map
.length
= kernel_sec_start
- start
;
1540 map
.type
= MT_MEMORY_RW
;
1541 create_mapping(&map
);
1542 /* Map memory above the kernel */
1543 map
.pfn
= __phys_to_pfn(kernel_sec_end
);
1544 map
.virtual = __phys_to_virt(kernel_sec_end
);
1545 map
.length
= end
- kernel_sec_end
;
1546 map
.type
= MT_MEMORY_RW
;
1547 create_mapping(&map
);
1550 /* Case 1: kernel and range start at the same address, should be common */
1551 if (kernel_sec_start
== start
)
1552 start
= kernel_sec_end
;
1553 /* Case 3: kernel and range end at the same address, should be rare */
1554 if (kernel_sec_end
== end
)
1555 end
= kernel_sec_start
;
1556 } else if ((kernel_sec_start
< start
) && (kernel_sec_end
> start
) && (kernel_sec_end
< end
)) {
1557 /* Case 2: kernel ends inside range, starts below it */
1558 start
= kernel_sec_end
;
1559 } else if ((kernel_sec_start
> start
) && (kernel_sec_start
< end
) && (kernel_sec_end
> end
)) {
1560 /* Case 4: kernel starts inside range, ends above it */
1561 end
= kernel_sec_start
;
1563 map
.pfn
= __phys_to_pfn(start
);
1564 map
.virtual = __phys_to_virt(start
);
1565 map
.length
= end
- start
;
1566 map
.type
= MT_MEMORY_RW
;
1567 create_mapping(&map
);
1571 static void __init
map_kernel(void)
1574 * We use the well known kernel section start and end and split the area in the
1578 * +----------------+ kernel_x_start
1581 * +----------------+ kernel_x_end / kernel_nx_start
1582 * | Non-executable |
1584 * +----------------+ kernel_nx_end
1588 * Notice that we are dealing with section sized mappings here so all of this
1589 * will be bumped to the closest section boundary. This means that some of the
1590 * non-executable part of the kernel memory is actually mapped as executable.
1591 * This will only persist until we turn on proper memory management later on
1592 * and we remap the whole kernel with page granularity.
1594 #ifdef CONFIG_XIP_KERNEL
1595 phys_addr_t kernel_nx_start
= kernel_sec_start
;
1597 phys_addr_t kernel_x_start
= kernel_sec_start
;
1598 phys_addr_t kernel_x_end
= round_up(__pa(__init_end
), SECTION_SIZE
);
1599 phys_addr_t kernel_nx_start
= kernel_x_end
;
1601 phys_addr_t kernel_nx_end
= kernel_sec_end
;
1602 struct map_desc map
;
1605 * Map the kernel if it is XIP.
1606 * It is always first in the modulearea.
1608 #ifdef CONFIG_XIP_KERNEL
1609 map
.pfn
= __phys_to_pfn(CONFIG_XIP_PHYS_ADDR
& SECTION_MASK
);
1610 map
.virtual = MODULES_VADDR
;
1611 map
.length
= ((unsigned long)_exiprom
- map
.virtual + ~SECTION_MASK
) & SECTION_MASK
;
1613 create_mapping(&map
);
1615 map
.pfn
= __phys_to_pfn(kernel_x_start
);
1616 map
.virtual = __phys_to_virt(kernel_x_start
);
1617 map
.length
= kernel_x_end
- kernel_x_start
;
1618 map
.type
= MT_MEMORY_RWX
;
1619 create_mapping(&map
);
1621 /* If the nx part is small it may end up covered by the tail of the RWX section */
1622 if (kernel_x_end
== kernel_nx_end
)
1625 map
.pfn
= __phys_to_pfn(kernel_nx_start
);
1626 map
.virtual = __phys_to_virt(kernel_nx_start
);
1627 map
.length
= kernel_nx_end
- kernel_nx_start
;
1628 map
.type
= MT_MEMORY_RW
;
1629 create_mapping(&map
);
1632 #ifdef CONFIG_ARM_PV_FIXUP
1633 typedef void pgtables_remap(long long offset
, unsigned long pgd
);
1634 pgtables_remap lpae_pgtables_remap_asm
;
1637 * early_paging_init() recreates boot time page table setup, allowing machines
1638 * to switch over to a high (>4G) address space on LPAE systems
1640 static void __init
early_paging_init(const struct machine_desc
*mdesc
)
1642 pgtables_remap
*lpae_pgtables_remap
;
1643 unsigned long pa_pgd
;
1647 if (!mdesc
->pv_fixup
)
1650 offset
= mdesc
->pv_fixup();
1655 * Offset the kernel section physical offsets so that the kernel
1656 * mapping will work out later on.
1658 kernel_sec_start
+= offset
;
1659 kernel_sec_end
+= offset
;
1662 * Get the address of the remap function in the 1:1 identity
1663 * mapping setup by the early page table assembly code. We
1664 * must get this prior to the pv update. The following barrier
1665 * ensures that this is complete before we fixup any P:V offsets.
1667 lpae_pgtables_remap
= (pgtables_remap
*)(unsigned long)__pa(lpae_pgtables_remap_asm
);
1668 pa_pgd
= __pa(swapper_pg_dir
);
1671 pr_info("Switching physical address space to 0x%08llx\n",
1672 (u64
)PHYS_OFFSET
+ offset
);
1674 /* Re-set the phys pfn offset, and the pv offset */
1675 __pv_offset
+= offset
;
1676 __pv_phys_pfn_offset
+= PFN_DOWN(offset
);
1678 /* Run the patch stub to update the constants */
1679 fixup_pv_table(&__pv_table_begin
,
1680 (&__pv_table_end
- &__pv_table_begin
) << 2);
1683 * We changing not only the virtual to physical mapping, but also
1684 * the physical addresses used to access memory. We need to flush
1685 * all levels of cache in the system with caching disabled to
1686 * ensure that all data is written back, and nothing is prefetched
1687 * into the caches. We also need to prevent the TLB walkers
1688 * allocating into the caches too. Note that this is ARMv7 LPAE
1692 set_cr(cr
& ~(CR_I
| CR_C
));
1693 ttbcr
= cpu_get_ttbcr();
1694 /* Disable all kind of caching of the translation table */
1695 tmp
= ttbcr
& ~(TTBCR_ORGN0_MASK
| TTBCR_IRGN0_MASK
);
1700 * Fixup the page tables - this must be in the idmap region as
1701 * we need to disable the MMU to do this safely, and hence it
1702 * needs to be assembly. It's fairly simple, as we're using the
1703 * temporary tables setup by the initial assembly code.
1705 lpae_pgtables_remap(offset
, pa_pgd
);
1707 /* Re-enable the caches and cacheable TLB walks */
1708 cpu_set_ttbcr(ttbcr
);
1714 static void __init
early_paging_init(const struct machine_desc
*mdesc
)
1718 if (!mdesc
->pv_fixup
)
1721 offset
= mdesc
->pv_fixup();
1725 pr_crit("Physical address space modification is only to support Keystone2.\n");
1726 pr_crit("Please enable ARM_LPAE and ARM_PATCH_PHYS_VIRT support to use this\n");
1727 pr_crit("feature. Your kernel may crash now, have a good day.\n");
1728 add_taint(TAINT_CPU_OUT_OF_SPEC
, LOCKDEP_STILL_OK
);
1733 static void __init
early_fixmap_shutdown(void)
1736 unsigned long va
= fix_to_virt(__end_of_permanent_fixed_addresses
- 1);
1738 pte_offset_fixmap
= pte_offset_late_fixmap
;
1739 pmd_clear(fixmap_pmd(va
));
1740 local_flush_tlb_kernel_page(va
);
1742 for (i
= 0; i
< __end_of_permanent_fixed_addresses
; i
++) {
1744 struct map_desc map
;
1746 map
.virtual = fix_to_virt(i
);
1747 pte
= pte_offset_early_fixmap(pmd_off_k(map
.virtual), map
.virtual);
1749 /* Only i/o device mappings are supported ATM */
1750 if (pte_none(*pte
) ||
1751 (pte_val(*pte
) & L_PTE_MT_MASK
) != L_PTE_MT_DEV_SHARED
)
1754 map
.pfn
= pte_pfn(*pte
);
1755 map
.type
= MT_DEVICE
;
1756 map
.length
= PAGE_SIZE
;
1758 create_mapping(&map
);
1763 * paging_init() sets up the page tables, initialises the zone memory
1764 * maps, and sets up the zero page, bad page and bad page tables.
1766 void __init
paging_init(const struct machine_desc
*mdesc
)
1770 #ifdef CONFIG_XIP_KERNEL
1771 /* Store the kernel RW RAM region start/end in these variables */
1772 kernel_sec_start
= CONFIG_PHYS_OFFSET
& SECTION_MASK
;
1773 kernel_sec_end
= round_up(__pa(_end
), SECTION_SIZE
);
1775 pr_debug("physical kernel sections: 0x%08llx-0x%08llx\n",
1776 kernel_sec_start
, kernel_sec_end
);
1778 prepare_page_table();
1780 memblock_set_current_limit(arm_lowmem_limit
);
1781 pr_debug("lowmem limit is %08llx\n", (long long)arm_lowmem_limit
);
1783 * After this point early_alloc(), i.e. the memblock allocator, can
1787 dma_contiguous_remap();
1788 early_fixmap_shutdown();
1789 devicemaps_init(mdesc
);
1793 top_pmd
= pmd_off_k(0xffff0000);
1795 /* allocate the zero page. */
1796 zero_page
= early_alloc(PAGE_SIZE
);
1800 empty_zero_page
= virt_to_page(zero_page
);
1801 __flush_dcache_folio(NULL
, page_folio(empty_zero_page
));
1804 void __init
early_mm_init(const struct machine_desc
*mdesc
)
1806 build_mem_type_table();
1807 early_paging_init(mdesc
);
1810 void set_ptes(struct mm_struct
*mm
, unsigned long addr
,
1811 pte_t
*ptep
, pte_t pteval
, unsigned int nr
)
1813 unsigned long ext
= 0;
1815 if (addr
< TASK_SIZE
&& pte_valid_user(pteval
)) {
1816 if (!pte_special(pteval
))
1817 __sync_icache_dcache(pteval
);
1822 set_pte_ext(ptep
, pteval
, ext
);
1826 pteval
= pte_next_pfn(pteval
);