1 // SPDX-License-Identifier: GPL-2.0-only
3 * Based on arch/arm/mm/init.c
5 * Copyright (C) 1995-2005 Russell King
6 * Copyright (C) 2012 ARM Ltd.
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/errno.h>
12 #include <linux/swap.h>
13 #include <linux/init.h>
14 #include <linux/cache.h>
15 #include <linux/mman.h>
16 #include <linux/nodemask.h>
17 #include <linux/initrd.h>
18 #include <linux/gfp.h>
19 #include <linux/memblock.h>
20 #include <linux/sort.h>
22 #include <linux/of_fdt.h>
23 #include <linux/dma-direct.h>
24 #include <linux/dma-mapping.h>
25 #include <linux/dma-contiguous.h>
26 #include <linux/efi.h>
27 #include <linux/swiotlb.h>
28 #include <linux/vmalloc.h>
30 #include <linux/kexec.h>
31 #include <linux/crash_dump.h>
34 #include <asm/fixmap.h>
35 #include <asm/kasan.h>
36 #include <asm/kernel-pgtable.h>
37 #include <asm/memory.h>
39 #include <asm/sections.h>
40 #include <asm/setup.h>
41 #include <linux/sizes.h>
43 #include <asm/alternative.h>
45 #define ARM64_ZONE_DMA_BITS 30
48 * We need to be able to catch inadvertent references to memstart_addr
49 * that occur (potentially in generic code) before arm64_memblock_init()
50 * executes, which assigns it its actual value. So use a default value
51 * that cannot be mistaken for a real physical address.
53 s64 memstart_addr __ro_after_init
= -1;
54 EXPORT_SYMBOL(memstart_addr
);
56 s64 physvirt_offset __ro_after_init
;
57 EXPORT_SYMBOL(physvirt_offset
);
59 struct page
*vmemmap __ro_after_init
;
60 EXPORT_SYMBOL(vmemmap
);
63 * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
64 * memory as some devices, namely the Raspberry Pi 4, have peripherals with
65 * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
66 * bit addressable memory area.
68 phys_addr_t arm64_dma_phys_limit __ro_after_init
;
69 static phys_addr_t arm64_dma32_phys_limit __ro_after_init
;
71 #ifdef CONFIG_KEXEC_CORE
73 * reserve_crashkernel() - reserves memory for crash kernel
75 * This function reserves memory area given in "crashkernel=" kernel command
76 * line parameter. The memory reserved is used by dump capture kernel when
77 * primary kernel is crashing.
79 static void __init
reserve_crashkernel(void)
81 unsigned long long crash_base
, crash_size
;
84 ret
= parse_crashkernel(boot_command_line
, memblock_phys_mem_size(),
85 &crash_size
, &crash_base
);
86 /* no crashkernel= or invalid value specified */
87 if (ret
|| !crash_size
)
90 crash_size
= PAGE_ALIGN(crash_size
);
92 if (crash_base
== 0) {
93 /* Current arm64 boot protocol requires 2MB alignment */
94 crash_base
= memblock_find_in_range(0, arm64_dma32_phys_limit
,
96 if (crash_base
== 0) {
97 pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
102 /* User specifies base address explicitly. */
103 if (!memblock_is_region_memory(crash_base
, crash_size
)) {
104 pr_warn("cannot reserve crashkernel: region is not memory\n");
108 if (memblock_is_region_reserved(crash_base
, crash_size
)) {
109 pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
113 if (!IS_ALIGNED(crash_base
, SZ_2M
)) {
114 pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
118 memblock_reserve(crash_base
, crash_size
);
120 pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
121 crash_base
, crash_base
+ crash_size
, crash_size
>> 20);
123 crashk_res
.start
= crash_base
;
124 crashk_res
.end
= crash_base
+ crash_size
- 1;
127 static void __init
reserve_crashkernel(void)
130 #endif /* CONFIG_KEXEC_CORE */
132 #ifdef CONFIG_CRASH_DUMP
133 static int __init
early_init_dt_scan_elfcorehdr(unsigned long node
,
134 const char *uname
, int depth
, void *data
)
139 if (depth
!= 1 || strcmp(uname
, "chosen") != 0)
142 reg
= of_get_flat_dt_prop(node
, "linux,elfcorehdr", &len
);
143 if (!reg
|| (len
< (dt_root_addr_cells
+ dt_root_size_cells
)))
146 elfcorehdr_addr
= dt_mem_next_cell(dt_root_addr_cells
, ®
);
147 elfcorehdr_size
= dt_mem_next_cell(dt_root_size_cells
, ®
);
153 * reserve_elfcorehdr() - reserves memory for elf core header
155 * This function reserves the memory occupied by an elf core header
156 * described in the device tree. This region contains all the
157 * information about primary kernel's core image and is used by a dump
158 * capture kernel to access the system memory on primary kernel.
160 static void __init
reserve_elfcorehdr(void)
162 of_scan_flat_dt(early_init_dt_scan_elfcorehdr
, NULL
);
164 if (!elfcorehdr_size
)
167 if (memblock_is_region_reserved(elfcorehdr_addr
, elfcorehdr_size
)) {
168 pr_warn("elfcorehdr is overlapped\n");
172 memblock_reserve(elfcorehdr_addr
, elfcorehdr_size
);
174 pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n",
175 elfcorehdr_size
>> 10, elfcorehdr_addr
);
178 static void __init
reserve_elfcorehdr(void)
181 #endif /* CONFIG_CRASH_DUMP */
184 * Return the maximum physical address for a zone with a given address size
185 * limit. It currently assumes that for memory starting above 4G, 32-bit
186 * devices will use a DMA offset.
188 static phys_addr_t __init
max_zone_phys(unsigned int zone_bits
)
190 phys_addr_t offset
= memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits
);
191 return min(offset
+ (1ULL << zone_bits
), memblock_end_of_DRAM());
196 static void __init
zone_sizes_init(unsigned long min
, unsigned long max
)
198 unsigned long max_zone_pfns
[MAX_NR_ZONES
] = {0};
200 #ifdef CONFIG_ZONE_DMA
201 max_zone_pfns
[ZONE_DMA
] = PFN_DOWN(arm64_dma_phys_limit
);
203 #ifdef CONFIG_ZONE_DMA32
204 max_zone_pfns
[ZONE_DMA32
] = PFN_DOWN(arm64_dma32_phys_limit
);
206 max_zone_pfns
[ZONE_NORMAL
] = max
;
208 free_area_init_nodes(max_zone_pfns
);
213 static void __init
zone_sizes_init(unsigned long min
, unsigned long max
)
215 struct memblock_region
*reg
;
216 unsigned long zone_size
[MAX_NR_ZONES
], zhole_size
[MAX_NR_ZONES
];
217 unsigned long __maybe_unused max_dma
, max_dma32
;
219 memset(zone_size
, 0, sizeof(zone_size
));
221 max_dma
= max_dma32
= min
;
222 #ifdef CONFIG_ZONE_DMA
223 max_dma
= max_dma32
= PFN_DOWN(arm64_dma_phys_limit
);
224 zone_size
[ZONE_DMA
] = max_dma
- min
;
226 #ifdef CONFIG_ZONE_DMA32
227 max_dma32
= PFN_DOWN(arm64_dma32_phys_limit
);
228 zone_size
[ZONE_DMA32
] = max_dma32
- max_dma
;
230 zone_size
[ZONE_NORMAL
] = max
- max_dma32
;
232 memcpy(zhole_size
, zone_size
, sizeof(zhole_size
));
234 for_each_memblock(memory
, reg
) {
235 unsigned long start
= memblock_region_memory_base_pfn(reg
);
236 unsigned long end
= memblock_region_memory_end_pfn(reg
);
238 #ifdef CONFIG_ZONE_DMA
239 if (start
>= min
&& start
< max_dma
) {
240 unsigned long dma_end
= min(end
, max_dma
);
241 zhole_size
[ZONE_DMA
] -= dma_end
- start
;
245 #ifdef CONFIG_ZONE_DMA32
246 if (start
>= max_dma
&& start
< max_dma32
) {
247 unsigned long dma32_end
= min(end
, max_dma32
);
248 zhole_size
[ZONE_DMA32
] -= dma32_end
- start
;
252 if (start
>= max_dma32
&& start
< max
) {
253 unsigned long normal_end
= min(end
, max
);
254 zhole_size
[ZONE_NORMAL
] -= normal_end
- start
;
258 free_area_init_node(0, zone_size
, min
, zhole_size
);
261 #endif /* CONFIG_NUMA */
263 int pfn_valid(unsigned long pfn
)
265 phys_addr_t addr
= pfn
<< PAGE_SHIFT
;
267 if ((addr
>> PAGE_SHIFT
) != pfn
)
270 #ifdef CONFIG_SPARSEMEM
271 if (pfn_to_section_nr(pfn
) >= NR_MEM_SECTIONS
)
274 if (!valid_section(__nr_to_section(pfn_to_section_nr(pfn
))))
277 return memblock_is_map_memory(addr
);
279 EXPORT_SYMBOL(pfn_valid
);
281 static phys_addr_t memory_limit
= PHYS_ADDR_MAX
;
284 * Limit the memory size that was specified via FDT.
286 static int __init
early_mem(char *p
)
291 memory_limit
= memparse(p
, &p
) & PAGE_MASK
;
292 pr_notice("Memory limited to %lldMB\n", memory_limit
>> 20);
296 early_param("mem", early_mem
);
298 static int __init
early_init_dt_scan_usablemem(unsigned long node
,
299 const char *uname
, int depth
, void *data
)
301 struct memblock_region
*usablemem
= data
;
305 if (depth
!= 1 || strcmp(uname
, "chosen") != 0)
308 reg
= of_get_flat_dt_prop(node
, "linux,usable-memory-range", &len
);
309 if (!reg
|| (len
< (dt_root_addr_cells
+ dt_root_size_cells
)))
312 usablemem
->base
= dt_mem_next_cell(dt_root_addr_cells
, ®
);
313 usablemem
->size
= dt_mem_next_cell(dt_root_size_cells
, ®
);
318 static void __init
fdt_enforce_memory_region(void)
320 struct memblock_region reg
= {
324 of_scan_flat_dt(early_init_dt_scan_usablemem
, ®
);
327 memblock_cap_memory_range(reg
.base
, reg
.size
);
330 void __init
arm64_memblock_init(void)
332 const s64 linear_region_size
= BIT(vabits_actual
- 1);
334 /* Handle linux,usable-memory-range property */
335 fdt_enforce_memory_region();
337 /* Remove memory above our supported physical address size */
338 memblock_remove(1ULL << PHYS_MASK_SHIFT
, ULLONG_MAX
);
341 * Select a suitable value for the base of physical memory.
343 memstart_addr
= round_down(memblock_start_of_DRAM(),
344 ARM64_MEMSTART_ALIGN
);
346 physvirt_offset
= PHYS_OFFSET
- PAGE_OFFSET
;
348 vmemmap
= ((struct page
*)VMEMMAP_START
- (memstart_addr
>> PAGE_SHIFT
));
351 * If we are running with a 52-bit kernel VA config on a system that
352 * does not support it, we have to offset our vmemmap and physvirt_offset
353 * s.t. we avoid the 52-bit portion of the direct linear map
355 if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52
) && (vabits_actual
!= 52)) {
356 vmemmap
+= (_PAGE_OFFSET(48) - _PAGE_OFFSET(52)) >> PAGE_SHIFT
;
357 physvirt_offset
= PHYS_OFFSET
- _PAGE_OFFSET(48);
361 * Remove the memory that we will not be able to cover with the
362 * linear mapping. Take care not to clip the kernel which may be
365 memblock_remove(max_t(u64
, memstart_addr
+ linear_region_size
,
366 __pa_symbol(_end
)), ULLONG_MAX
);
367 if (memstart_addr
+ linear_region_size
< memblock_end_of_DRAM()) {
368 /* ensure that memstart_addr remains sufficiently aligned */
369 memstart_addr
= round_up(memblock_end_of_DRAM() - linear_region_size
,
370 ARM64_MEMSTART_ALIGN
);
371 memblock_remove(0, memstart_addr
);
375 * Apply the memory limit if it was set. Since the kernel may be loaded
376 * high up in memory, add back the kernel region that must be accessible
377 * via the linear mapping.
379 if (memory_limit
!= PHYS_ADDR_MAX
) {
380 memblock_mem_limit_remove_map(memory_limit
);
381 memblock_add(__pa_symbol(_text
), (u64
)(_end
- _text
));
384 if (IS_ENABLED(CONFIG_BLK_DEV_INITRD
) && phys_initrd_size
) {
386 * Add back the memory we just removed if it results in the
387 * initrd to become inaccessible via the linear mapping.
388 * Otherwise, this is a no-op
390 u64 base
= phys_initrd_start
& PAGE_MASK
;
391 u64 size
= PAGE_ALIGN(phys_initrd_start
+ phys_initrd_size
) - base
;
394 * We can only add back the initrd memory if we don't end up
395 * with more memory than we can address via the linear mapping.
396 * It is up to the bootloader to position the kernel and the
397 * initrd reasonably close to each other (i.e., within 32 GB of
398 * each other) so that all granule/#levels combinations can
399 * always access both.
401 if (WARN(base
< memblock_start_of_DRAM() ||
402 base
+ size
> memblock_start_of_DRAM() +
404 "initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) {
405 phys_initrd_size
= 0;
407 memblock_remove(base
, size
); /* clear MEMBLOCK_ flags */
408 memblock_add(base
, size
);
409 memblock_reserve(base
, size
);
413 if (IS_ENABLED(CONFIG_RANDOMIZE_BASE
)) {
414 extern u16 memstart_offset_seed
;
415 u64 range
= linear_region_size
-
416 (memblock_end_of_DRAM() - memblock_start_of_DRAM());
419 * If the size of the linear region exceeds, by a sufficient
420 * margin, the size of the region that the available physical
421 * memory spans, randomize the linear region as well.
423 if (memstart_offset_seed
> 0 && range
>= ARM64_MEMSTART_ALIGN
) {
424 range
/= ARM64_MEMSTART_ALIGN
;
425 memstart_addr
-= ARM64_MEMSTART_ALIGN
*
426 ((range
* memstart_offset_seed
) >> 16);
431 * Register the kernel text, kernel data, initrd, and initial
432 * pagetables with memblock.
434 memblock_reserve(__pa_symbol(_text
), _end
- _text
);
435 if (IS_ENABLED(CONFIG_BLK_DEV_INITRD
) && phys_initrd_size
) {
436 /* the generic initrd code expects virtual addresses */
437 initrd_start
= __phys_to_virt(phys_initrd_start
);
438 initrd_end
= initrd_start
+ phys_initrd_size
;
441 early_init_fdt_scan_reserved_mem();
443 if (IS_ENABLED(CONFIG_ZONE_DMA
)) {
444 zone_dma_bits
= ARM64_ZONE_DMA_BITS
;
445 arm64_dma_phys_limit
= max_zone_phys(ARM64_ZONE_DMA_BITS
);
448 if (IS_ENABLED(CONFIG_ZONE_DMA32
))
449 arm64_dma32_phys_limit
= max_zone_phys(32);
451 arm64_dma32_phys_limit
= PHYS_MASK
+ 1;
453 reserve_crashkernel();
455 reserve_elfcorehdr();
457 high_memory
= __va(memblock_end_of_DRAM() - 1) + 1;
459 dma_contiguous_reserve(arm64_dma32_phys_limit
);
462 void __init
bootmem_init(void)
464 unsigned long min
, max
;
466 min
= PFN_UP(memblock_start_of_DRAM());
467 max
= PFN_DOWN(memblock_end_of_DRAM());
469 early_memtest(min
<< PAGE_SHIFT
, max
<< PAGE_SHIFT
);
471 max_pfn
= max_low_pfn
= max
;
476 * Sparsemem tries to allocate bootmem in memory_present(), so must be
477 * done after the fixed reservations.
482 zone_sizes_init(min
, max
);
487 #ifndef CONFIG_SPARSEMEM_VMEMMAP
488 static inline void free_memmap(unsigned long start_pfn
, unsigned long end_pfn
)
490 struct page
*start_pg
, *end_pg
;
491 unsigned long pg
, pgend
;
494 * Convert start_pfn/end_pfn to a struct page pointer.
496 start_pg
= pfn_to_page(start_pfn
- 1) + 1;
497 end_pg
= pfn_to_page(end_pfn
- 1) + 1;
500 * Convert to physical addresses, and round start upwards and end
503 pg
= (unsigned long)PAGE_ALIGN(__pa(start_pg
));
504 pgend
= (unsigned long)__pa(end_pg
) & PAGE_MASK
;
507 * If there are free pages between these, free the section of the
511 memblock_free(pg
, pgend
- pg
);
515 * The mem_map array can get very big. Free the unused area of the memory map.
517 static void __init
free_unused_memmap(void)
519 unsigned long start
, prev_end
= 0;
520 struct memblock_region
*reg
;
522 for_each_memblock(memory
, reg
) {
523 start
= __phys_to_pfn(reg
->base
);
525 #ifdef CONFIG_SPARSEMEM
527 * Take care not to free memmap entries that don't exist due
528 * to SPARSEMEM sections which aren't present.
530 start
= min(start
, ALIGN(prev_end
, PAGES_PER_SECTION
));
533 * If we had a previous bank, and there is a space between the
534 * current bank and the previous, free it.
536 if (prev_end
&& prev_end
< start
)
537 free_memmap(prev_end
, start
);
540 * Align up here since the VM subsystem insists that the
541 * memmap entries are valid from the bank end aligned to
542 * MAX_ORDER_NR_PAGES.
544 prev_end
= ALIGN(__phys_to_pfn(reg
->base
+ reg
->size
),
548 #ifdef CONFIG_SPARSEMEM
549 if (!IS_ALIGNED(prev_end
, PAGES_PER_SECTION
))
550 free_memmap(prev_end
, ALIGN(prev_end
, PAGES_PER_SECTION
));
553 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
556 * mem_init() marks the free areas in the mem_map and tells us how much memory
557 * is free. This is done after various parts of the system have claimed their
558 * memory after the kernel image.
560 void __init
mem_init(void)
562 if (swiotlb_force
== SWIOTLB_FORCE
||
563 max_pfn
> PFN_DOWN(arm64_dma_phys_limit
? : arm64_dma32_phys_limit
))
566 swiotlb_force
= SWIOTLB_NO_FORCE
;
568 set_max_mapnr(max_pfn
- PHYS_PFN_OFFSET
);
570 #ifndef CONFIG_SPARSEMEM_VMEMMAP
571 free_unused_memmap();
573 /* this will put all unused low memory onto the freelists */
576 mem_init_print_info(NULL
);
579 * Check boundaries twice: Some fundamental inconsistencies can be
580 * detected at build time already.
583 BUILD_BUG_ON(TASK_SIZE_32
> DEFAULT_MAP_WINDOW_64
);
586 if (PAGE_SIZE
>= 16384 && get_num_physpages() <= 128) {
587 extern int sysctl_overcommit_memory
;
589 * On a machine this small we won't get anywhere without
590 * overcommit, so turn it on by default.
592 sysctl_overcommit_memory
= OVERCOMMIT_ALWAYS
;
596 void free_initmem(void)
598 free_reserved_area(lm_alias(__init_begin
),
599 lm_alias(__init_end
),
600 POISON_FREE_INITMEM
, "unused kernel");
602 * Unmap the __init region but leave the VM area in place. This
603 * prevents the region from being reused for kernel modules, which
604 * is not supported by kallsyms.
606 unmap_kernel_range((u64
)__init_begin
, (u64
)(__init_end
- __init_begin
));
610 * Dump out memory limit information on panic.
612 static int dump_mem_limit(struct notifier_block
*self
, unsigned long v
, void *p
)
614 if (memory_limit
!= PHYS_ADDR_MAX
) {
615 pr_emerg("Memory Limit: %llu MB\n", memory_limit
>> 20);
617 pr_emerg("Memory Limit: none\n");
622 static struct notifier_block mem_limit_notifier
= {
623 .notifier_call
= dump_mem_limit
,
626 static int __init
register_mem_limit_dumper(void)
628 atomic_notifier_chain_register(&panic_notifier_list
,
629 &mem_limit_notifier
);
632 __initcall(register_mem_limit_dumper
);