1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
4 * dump with assistance from firmware. This approach does not use kexec,
5 * instead firmware assists in booting the kdump kernel while preserving
6 * memory contents. The most of the code implementation has been adapted
7 * from phyp assisted dump implementation written by Linas Vepstas and
10 * Copyright 2011 IBM Corporation
11 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
15 #define pr_fmt(fmt) "fadump: " fmt
17 #include <linux/string.h>
18 #include <linux/memblock.h>
19 #include <linux/delay.h>
20 #include <linux/seq_file.h>
21 #include <linux/crash_dump.h>
22 #include <linux/kobject.h>
23 #include <linux/sysfs.h>
24 #include <linux/slab.h>
25 #include <linux/cma.h>
26 #include <linux/hugetlb.h>
27 #include <linux/debugfs.h>
29 #include <linux/of_fdt.h>
32 #include <asm/fadump.h>
33 #include <asm/fadump-internal.h>
34 #include <asm/setup.h>
35 #include <asm/interrupt.h>
38 * The CPU who acquired the lock to trigger the fadump crash should
39 * wait for other CPUs to enter.
41 * The timeout is in milliseconds.
43 #define CRASH_TIMEOUT 500
45 static struct fw_dump fw_dump
;
47 static void __init
fadump_reserve_crash_area(u64 base
);
49 #ifndef CONFIG_PRESERVE_FA_DUMP
51 static struct kobject
*fadump_kobj
;
53 static atomic_t cpus_in_fadump
;
54 static DEFINE_MUTEX(fadump_mutex
);
56 #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */
57 #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \
58 sizeof(struct fadump_memory_range))
59 static struct fadump_memory_range rngs
[RESERVED_RNGS_CNT
];
60 static struct fadump_mrange_info
61 reserved_mrange_info
= { "reserved", rngs
, RESERVED_RNGS_SZ
, 0, RESERVED_RNGS_CNT
, true };
63 static void __init
early_init_dt_scan_reserved_ranges(unsigned long node
);
66 static struct cma
*fadump_cma
;
69 * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
71 * This function initializes CMA area from fadump reserved memory.
72 * The total size of fadump reserved memory covers for boot memory size
73 * + cpu data size + hpte size and metadata.
74 * Initialize only the area equivalent to boot memory size for CMA use.
75 * The remaining portion of fadump reserved memory will be not given
76 * to CMA and pages for those will stay reserved. boot memory size is
77 * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
78 * But for some reason even if it fails we still have the memory reservation
79 * with us and we can still continue doing fadump.
81 void __init
fadump_cma_init(void)
83 unsigned long long base
, size
, end
;
86 if (!fw_dump
.fadump_supported
|| !fw_dump
.fadump_enabled
||
90 * Do not use CMA if user has provided fadump=nocma kernel parameter.
92 if (fw_dump
.nocma
|| !fw_dump
.boot_memory_size
)
96 * [base, end) should be reserved during early init in
97 * fadump_reserve_mem(). No need to check this here as
98 * cma_init_reserved_mem() already checks for overlap.
99 * Here we give the aligned chunk of this reserved memory to CMA.
101 base
= fw_dump
.reserve_dump_area_start
;
102 size
= fw_dump
.boot_memory_size
;
105 base
= ALIGN(base
, CMA_MIN_ALIGNMENT_BYTES
);
106 end
= ALIGN_DOWN(end
, CMA_MIN_ALIGNMENT_BYTES
);
110 pr_warn("%s: Too less memory to give to CMA\n", __func__
);
114 rc
= cma_init_reserved_mem(base
, size
, 0, "fadump_cma", &fadump_cma
);
116 pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc
);
118 * Though the CMA init has failed we still have memory
119 * reservation with us. The reserved memory will be
120 * blocked from production system usage. Hence return 1,
121 * so that we can continue with fadump.
127 * If CMA activation fails, keep the pages reserved, instead of
128 * exposing them to buddy allocator. Same as 'fadump=nocma' case.
130 cma_reserve_pages_on_error(fadump_cma
);
133 * So we now have successfully initialized cma area for fadump.
135 pr_info("Initialized [0x%llx, %luMB] cma area from [0x%lx, %luMB] "
136 "bytes of memory reserved for firmware-assisted dump\n",
137 cma_get_base(fadump_cma
), cma_get_size(fadump_cma
) >> 20,
138 fw_dump
.reserve_dump_area_start
,
139 fw_dump
.boot_memory_size
>> 20);
142 #endif /* CONFIG_CMA */
145 * Additional parameters meant for capture kernel are placed in a dedicated area.
146 * If this is capture kernel boot, append these parameters to bootargs.
148 void __init
fadump_append_bootargs(void)
153 if (!fw_dump
.dump_active
|| !fw_dump
.param_area_supported
|| !fw_dump
.param_area
)
156 if (fw_dump
.param_area
< fw_dump
.boot_mem_top
) {
157 if (memblock_reserve(fw_dump
.param_area
, COMMAND_LINE_SIZE
)) {
158 pr_warn("WARNING: Can't use additional parameters area!\n");
159 fw_dump
.param_area
= 0;
164 append_args
= (char *)fw_dump
.param_area
;
165 len
= strlen(boot_command_line
);
168 * Too late to fail even if cmdline size exceeds. Truncate additional parameters
169 * to cmdline size and proceed anyway.
171 if (len
+ strlen(append_args
) >= COMMAND_LINE_SIZE
- 1)
172 pr_warn("WARNING: Appending parameters exceeds cmdline size. Truncating!\n");
174 pr_debug("Cmdline: %s\n", boot_command_line
);
175 snprintf(boot_command_line
+ len
, COMMAND_LINE_SIZE
- len
, " %s", append_args
);
176 pr_info("Updated cmdline: %s\n", boot_command_line
);
179 /* Scan the Firmware Assisted dump configuration details. */
180 int __init
early_init_dt_scan_fw_dump(unsigned long node
, const char *uname
,
181 int depth
, void *data
)
184 early_init_dt_scan_reserved_ranges(node
);
191 if (strcmp(uname
, "rtas") == 0) {
192 rtas_fadump_dt_scan(&fw_dump
, node
);
196 if (strcmp(uname
, "ibm,opal") == 0) {
197 opal_fadump_dt_scan(&fw_dump
, node
);
205 * If fadump is registered, check if the memory provided
206 * falls within boot memory area and reserved memory area.
208 int is_fadump_memory_area(u64 addr
, unsigned long size
)
212 if (!fw_dump
.dump_registered
)
218 d_start
= fw_dump
.reserve_dump_area_start
;
219 d_end
= d_start
+ fw_dump
.reserve_dump_area_size
;
220 if (((addr
+ size
) > d_start
) && (addr
<= d_end
))
223 return (addr
<= fw_dump
.boot_mem_top
);
226 int should_fadump_crash(void)
228 if (!fw_dump
.dump_registered
|| !fw_dump
.fadumphdr_addr
)
233 int is_fadump_active(void)
235 return fw_dump
.dump_active
;
239 * Returns true, if there are no holes in memory area between d_start to d_end,
242 static bool is_fadump_mem_area_contiguous(u64 d_start
, u64 d_end
)
244 phys_addr_t reg_start
, reg_end
;
248 for_each_mem_range(i
, ®_start
, ®_end
) {
249 start
= max_t(u64
, d_start
, reg_start
);
250 end
= min_t(u64
, d_end
, reg_end
);
252 /* Memory hole from d_start to start */
269 * Returns true, if there are no holes in reserved memory area,
272 bool is_fadump_reserved_mem_contiguous(void)
276 d_start
= fw_dump
.reserve_dump_area_start
;
277 d_end
= d_start
+ fw_dump
.reserve_dump_area_size
;
278 return is_fadump_mem_area_contiguous(d_start
, d_end
);
281 /* Print firmware assisted dump configurations for debugging purpose. */
282 static void __init
fadump_show_config(void)
286 pr_debug("Support for firmware-assisted dump (fadump): %s\n",
287 (fw_dump
.fadump_supported
? "present" : "no support"));
289 if (!fw_dump
.fadump_supported
)
292 pr_debug("Fadump enabled : %s\n",
293 (fw_dump
.fadump_enabled
? "yes" : "no"));
294 pr_debug("Dump Active : %s\n",
295 (fw_dump
.dump_active
? "yes" : "no"));
296 pr_debug("Dump section sizes:\n");
297 pr_debug(" CPU state data size: %lx\n", fw_dump
.cpu_state_data_size
);
298 pr_debug(" HPTE region size : %lx\n", fw_dump
.hpte_region_size
);
299 pr_debug(" Boot memory size : %lx\n", fw_dump
.boot_memory_size
);
300 pr_debug(" Boot memory top : %llx\n", fw_dump
.boot_mem_top
);
301 pr_debug("Boot memory regions cnt: %llx\n", fw_dump
.boot_mem_regs_cnt
);
302 for (i
= 0; i
< fw_dump
.boot_mem_regs_cnt
; i
++) {
303 pr_debug("[%03d] base = %llx, size = %llx\n", i
,
304 fw_dump
.boot_mem_addr
[i
], fw_dump
.boot_mem_sz
[i
]);
309 * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
311 * Function to find the largest memory size we need to reserve during early
312 * boot process. This will be the size of the memory that is required for a
313 * kernel to boot successfully.
315 * This function has been taken from phyp-assisted dump feature implementation.
317 * returns larger of 256MB or 5% rounded down to multiples of 256MB.
319 * TODO: Come up with better approach to find out more accurate memory size
320 * that is required for a kernel to boot successfully.
323 static __init u64
fadump_calculate_reserve_size(void)
325 u64 base
, size
, bootmem_min
;
328 if (fw_dump
.reserve_bootvar
)
329 pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
332 * Check if the size is specified through crashkernel= cmdline
333 * option. If yes, then use that but ignore base as fadump reserves
334 * memory at a predefined offset.
336 ret
= parse_crashkernel(boot_command_line
, memblock_phys_mem_size(),
337 &size
, &base
, NULL
, NULL
);
338 if (ret
== 0 && size
> 0) {
339 unsigned long max_size
;
341 if (fw_dump
.reserve_bootvar
)
342 pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
344 fw_dump
.reserve_bootvar
= (unsigned long)size
;
347 * Adjust if the boot memory size specified is above
350 max_size
= memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO
;
351 if (fw_dump
.reserve_bootvar
> max_size
) {
352 fw_dump
.reserve_bootvar
= max_size
;
353 pr_info("Adjusted boot memory size to %luMB\n",
354 (fw_dump
.reserve_bootvar
>> 20));
357 return fw_dump
.reserve_bootvar
;
358 } else if (fw_dump
.reserve_bootvar
) {
360 * 'fadump_reserve_mem=' is being used to reserve memory
361 * for firmware-assisted dump.
363 return fw_dump
.reserve_bootvar
;
366 /* divide by 20 to get 5% of value */
367 size
= memblock_phys_mem_size() / 20;
369 /* round it down in multiples of 256 */
370 size
= size
& ~0x0FFFFFFFUL
;
372 /* Truncate to memory_limit. We don't want to over reserve the memory.*/
373 if (memory_limit
&& size
> memory_limit
)
376 bootmem_min
= fw_dump
.ops
->fadump_get_bootmem_min();
377 return (size
> bootmem_min
? size
: bootmem_min
);
381 * Calculate the total memory size required to be reserved for
382 * firmware-assisted dump registration.
384 static unsigned long __init
get_fadump_area_size(void)
386 unsigned long size
= 0;
388 size
+= fw_dump
.cpu_state_data_size
;
389 size
+= fw_dump
.hpte_region_size
;
391 * Account for pagesize alignment of boot memory area destination address.
392 * This faciliates in mmap reading of first kernel's memory.
394 size
= PAGE_ALIGN(size
);
395 size
+= fw_dump
.boot_memory_size
;
396 size
+= sizeof(struct fadump_crash_info_header
);
398 /* This is to hold kernel metadata on platforms that support it */
399 size
+= (fw_dump
.ops
->fadump_get_metadata_size
?
400 fw_dump
.ops
->fadump_get_metadata_size() : 0);
404 static int __init
add_boot_mem_region(unsigned long rstart
,
407 int max_boot_mem_rgns
= fw_dump
.ops
->fadump_max_boot_mem_rgns();
408 int i
= fw_dump
.boot_mem_regs_cnt
++;
410 if (fw_dump
.boot_mem_regs_cnt
> max_boot_mem_rgns
) {
411 fw_dump
.boot_mem_regs_cnt
= max_boot_mem_rgns
;
415 pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
416 i
, rstart
, (rstart
+ rsize
));
417 fw_dump
.boot_mem_addr
[i
] = rstart
;
418 fw_dump
.boot_mem_sz
[i
] = rsize
;
423 * Firmware usually has a hard limit on the data it can copy per region.
424 * Honour that by splitting a memory range into multiple regions.
426 static int __init
add_boot_mem_regions(unsigned long mstart
,
429 unsigned long rstart
, rsize
, max_size
;
433 max_size
= fw_dump
.max_copy_size
? fw_dump
.max_copy_size
: msize
;
435 if (msize
> max_size
)
440 ret
= add_boot_mem_region(rstart
, rsize
);
451 static int __init
fadump_get_boot_mem_regions(void)
453 unsigned long size
, cur_size
, hole_size
, last_end
;
454 unsigned long mem_size
= fw_dump
.boot_memory_size
;
455 phys_addr_t reg_start
, reg_end
;
459 fw_dump
.boot_mem_regs_cnt
= 0;
464 for_each_mem_range(i
, ®_start
, ®_end
) {
465 size
= reg_end
- reg_start
;
466 hole_size
+= (reg_start
- last_end
);
468 if ((cur_size
+ size
) >= mem_size
) {
469 size
= (mem_size
- cur_size
);
470 ret
= add_boot_mem_regions(reg_start
, size
);
476 ret
= add_boot_mem_regions(reg_start
, size
);
482 fw_dump
.boot_mem_top
= PAGE_ALIGN(fw_dump
.boot_memory_size
+ hole_size
);
488 * Returns true, if the given range overlaps with reserved memory ranges
489 * starting at idx. Also, updates idx to index of overlapping memory range
490 * with the given memory range.
493 static bool __init
overlaps_reserved_ranges(u64 base
, u64 end
, int *idx
)
498 for (i
= *idx
; i
< reserved_mrange_info
.mem_range_cnt
; i
++) {
499 u64 rbase
= reserved_mrange_info
.mem_ranges
[i
].base
;
500 u64 rend
= rbase
+ reserved_mrange_info
.mem_ranges
[i
].size
;
505 if ((end
> rbase
) && (base
< rend
)) {
516 * Locate a suitable memory area to reserve memory for FADump. While at it,
517 * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
519 static u64 __init
fadump_locate_reserve_mem(u64 base
, u64 size
)
521 struct fadump_memory_range
*mrngs
;
522 phys_addr_t mstart
, mend
;
526 mrngs
= reserved_mrange_info
.mem_ranges
;
527 for_each_free_mem_range(i
, NUMA_NO_NODE
, MEMBLOCK_NONE
,
528 &mstart
, &mend
, NULL
) {
529 pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
530 i
, mstart
, mend
, base
);
533 base
= PAGE_ALIGN(mstart
);
535 while ((mend
> base
) && ((mend
- base
) >= size
)) {
536 if (!overlaps_reserved_ranges(base
, base
+size
, &idx
)) {
541 base
= mrngs
[idx
].base
+ mrngs
[idx
].size
;
542 base
= PAGE_ALIGN(base
);
550 int __init
fadump_reserve_mem(void)
552 u64 base
, size
, mem_boundary
, bootmem_min
;
555 if (!fw_dump
.fadump_enabled
)
558 if (!fw_dump
.fadump_supported
) {
559 pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
564 * Initialize boot memory size
565 * If dump is active then we have already calculated the size during
568 if (!fw_dump
.dump_active
) {
569 fw_dump
.boot_memory_size
=
570 PAGE_ALIGN(fadump_calculate_reserve_size());
572 bootmem_min
= fw_dump
.ops
->fadump_get_bootmem_min();
573 if (fw_dump
.boot_memory_size
< bootmem_min
) {
574 pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
575 fw_dump
.boot_memory_size
, bootmem_min
);
579 if (!fadump_get_boot_mem_regions()) {
580 pr_err("Too many holes in boot memory area to enable fadump\n");
586 mem_boundary
= memory_limit
;
588 mem_boundary
= memblock_end_of_DRAM();
590 base
= fw_dump
.boot_mem_top
;
591 size
= get_fadump_area_size();
592 fw_dump
.reserve_dump_area_size
= size
;
593 if (fw_dump
.dump_active
) {
594 pr_info("Firmware-assisted dump is active.\n");
596 #ifdef CONFIG_HUGETLB_PAGE
598 * FADump capture kernel doesn't care much about hugepages.
599 * In fact, handling hugepages in capture kernel is asking for
600 * trouble. So, disable HugeTLB support when fadump is active.
602 hugetlb_disabled
= true;
605 * If last boot has crashed then reserve all the memory
606 * above boot memory size so that we don't touch it until
607 * dump is written to disk by userspace tool. This memory
608 * can be released for general use by invalidating fadump.
610 fadump_reserve_crash_area(base
);
612 pr_debug("fadumphdr_addr = %#016lx\n", fw_dump
.fadumphdr_addr
);
613 pr_debug("Reserve dump area start address: 0x%lx\n",
614 fw_dump
.reserve_dump_area_start
);
617 * Reserve memory at an offset closer to bottom of the RAM to
618 * minimize the impact of memory hot-remove operation.
620 base
= fadump_locate_reserve_mem(base
, size
);
622 if (!base
|| (base
+ size
> mem_boundary
)) {
623 pr_err("Failed to find memory chunk for reservation!\n");
626 fw_dump
.reserve_dump_area_start
= base
;
629 * Calculate the kernel metadata address and register it with
630 * f/w if the platform supports.
632 if (fw_dump
.ops
->fadump_setup_metadata
&&
633 (fw_dump
.ops
->fadump_setup_metadata(&fw_dump
) < 0))
636 if (memblock_reserve(base
, size
)) {
637 pr_err("Failed to reserve memory!\n");
641 pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
642 (size
>> 20), base
, (memblock_phys_mem_size() >> 20));
647 fw_dump
.fadump_enabled
= 0;
648 fw_dump
.reserve_dump_area_size
= 0;
652 /* Look for fadump= cmdline option. */
653 static int __init
early_fadump_param(char *p
)
658 if (strncmp(p
, "on", 2) == 0)
659 fw_dump
.fadump_enabled
= 1;
660 else if (strncmp(p
, "off", 3) == 0)
661 fw_dump
.fadump_enabled
= 0;
662 else if (strncmp(p
, "nocma", 5) == 0) {
663 fw_dump
.fadump_enabled
= 1;
669 early_param("fadump", early_fadump_param
);
672 * Look for fadump_reserve_mem= cmdline option
673 * TODO: Remove references to 'fadump_reserve_mem=' parameter,
674 * the sooner 'crashkernel=' parameter is accustomed to.
676 static int __init
early_fadump_reserve_mem(char *p
)
679 fw_dump
.reserve_bootvar
= memparse(p
, &p
);
682 early_param("fadump_reserve_mem", early_fadump_reserve_mem
);
684 void crash_fadump(struct pt_regs
*regs
, const char *str
)
687 struct fadump_crash_info_header
*fdh
= NULL
;
688 int old_cpu
, this_cpu
;
689 /* Do not include first CPU */
690 unsigned int ncpus
= num_online_cpus() - 1;
692 if (!should_fadump_crash())
696 * old_cpu == -1 means this is the first CPU which has come here,
697 * go ahead and trigger fadump.
699 * old_cpu != -1 means some other CPU has already on its way
700 * to trigger fadump, just keep looping here.
702 this_cpu
= smp_processor_id();
703 old_cpu
= cmpxchg(&crashing_cpu
, -1, this_cpu
);
706 atomic_inc(&cpus_in_fadump
);
709 * We can't loop here indefinitely. Wait as long as fadump
710 * is in force. If we race with fadump un-registration this
711 * loop will break and then we go down to normal panic path
712 * and reboot. If fadump is in force the first crashing
713 * cpu will definitely trigger fadump.
715 while (fw_dump
.dump_registered
)
720 fdh
= __va(fw_dump
.fadumphdr_addr
);
721 fdh
->crashing_cpu
= crashing_cpu
;
722 crash_save_vmcoreinfo();
727 ppc_save_regs(&fdh
->regs
);
729 fdh
->cpu_mask
= *cpu_online_mask
;
732 * If we came in via system reset, wait a while for the secondary
735 if (TRAP(&(fdh
->regs
)) == INTERRUPT_SYSTEM_RESET
) {
736 msecs
= CRASH_TIMEOUT
;
737 while ((atomic_read(&cpus_in_fadump
) < ncpus
) && (--msecs
> 0))
741 fw_dump
.ops
->fadump_trigger(fdh
, str
);
744 u32
*__init
fadump_regs_to_elf_notes(u32
*buf
, struct pt_regs
*regs
)
746 struct elf_prstatus prstatus
;
748 memset(&prstatus
, 0, sizeof(prstatus
));
750 * FIXME: How do i get PID? Do I really need it?
751 * prstatus.pr_pid = ????
753 elf_core_copy_regs(&prstatus
.pr_reg
, regs
);
754 buf
= append_elf_note(buf
, CRASH_CORE_NOTE_NAME
, NT_PRSTATUS
,
755 &prstatus
, sizeof(prstatus
));
759 void __init
fadump_update_elfcore_header(char *bufp
)
761 struct elf_phdr
*phdr
;
763 bufp
+= sizeof(struct elfhdr
);
765 /* First note is a place holder for cpu notes info. */
766 phdr
= (struct elf_phdr
*)bufp
;
768 if (phdr
->p_type
== PT_NOTE
) {
769 phdr
->p_paddr
= __pa(fw_dump
.cpu_notes_buf_vaddr
);
770 phdr
->p_offset
= phdr
->p_paddr
;
771 phdr
->p_filesz
= fw_dump
.cpu_notes_buf_size
;
772 phdr
->p_memsz
= fw_dump
.cpu_notes_buf_size
;
777 static void *__init
fadump_alloc_buffer(unsigned long size
)
779 unsigned long count
, i
;
783 vaddr
= alloc_pages_exact(size
, GFP_KERNEL
| __GFP_ZERO
);
787 count
= PAGE_ALIGN(size
) / PAGE_SIZE
;
788 page
= virt_to_page(vaddr
);
789 for (i
= 0; i
< count
; i
++)
790 mark_page_reserved(page
+ i
);
794 static void fadump_free_buffer(unsigned long vaddr
, unsigned long size
)
796 free_reserved_area((void *)vaddr
, (void *)(vaddr
+ size
), -1, NULL
);
799 s32 __init
fadump_setup_cpu_notes_buf(u32 num_cpus
)
801 /* Allocate buffer to hold cpu crash notes. */
802 fw_dump
.cpu_notes_buf_size
= num_cpus
* sizeof(note_buf_t
);
803 fw_dump
.cpu_notes_buf_size
= PAGE_ALIGN(fw_dump
.cpu_notes_buf_size
);
804 fw_dump
.cpu_notes_buf_vaddr
=
805 (unsigned long)fadump_alloc_buffer(fw_dump
.cpu_notes_buf_size
);
806 if (!fw_dump
.cpu_notes_buf_vaddr
) {
807 pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
808 fw_dump
.cpu_notes_buf_size
);
812 pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
813 fw_dump
.cpu_notes_buf_size
,
814 fw_dump
.cpu_notes_buf_vaddr
);
818 void fadump_free_cpu_notes_buf(void)
820 if (!fw_dump
.cpu_notes_buf_vaddr
)
823 fadump_free_buffer(fw_dump
.cpu_notes_buf_vaddr
,
824 fw_dump
.cpu_notes_buf_size
);
825 fw_dump
.cpu_notes_buf_vaddr
= 0;
826 fw_dump
.cpu_notes_buf_size
= 0;
829 static void fadump_free_mem_ranges(struct fadump_mrange_info
*mrange_info
)
831 if (mrange_info
->is_static
) {
832 mrange_info
->mem_range_cnt
= 0;
836 kfree(mrange_info
->mem_ranges
);
837 memset((void *)((u64
)mrange_info
+ RNG_NAME_SZ
), 0,
838 (sizeof(struct fadump_mrange_info
) - RNG_NAME_SZ
));
842 * Allocate or reallocate mem_ranges array in incremental units
845 static int fadump_alloc_mem_ranges(struct fadump_mrange_info
*mrange_info
)
847 struct fadump_memory_range
*new_array
;
850 new_size
= mrange_info
->mem_ranges_sz
+ PAGE_SIZE
;
851 pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
852 new_size
, mrange_info
->name
);
854 new_array
= krealloc(mrange_info
->mem_ranges
, new_size
, GFP_KERNEL
);
855 if (new_array
== NULL
) {
856 pr_err("Insufficient memory for setting up %s memory ranges\n",
858 fadump_free_mem_ranges(mrange_info
);
862 mrange_info
->mem_ranges
= new_array
;
863 mrange_info
->mem_ranges_sz
= new_size
;
864 mrange_info
->max_mem_ranges
= (new_size
/
865 sizeof(struct fadump_memory_range
));
868 static inline int fadump_add_mem_range(struct fadump_mrange_info
*mrange_info
,
871 struct fadump_memory_range
*mem_ranges
= mrange_info
->mem_ranges
;
872 bool is_adjacent
= false;
879 * Fold adjacent memory ranges to bring down the memory ranges/
880 * PT_LOAD segments count.
882 if (mrange_info
->mem_range_cnt
) {
883 start
= mem_ranges
[mrange_info
->mem_range_cnt
- 1].base
;
884 size
= mem_ranges
[mrange_info
->mem_range_cnt
- 1].size
;
887 * Boot memory area needs separate PT_LOAD segment(s) as it
888 * is moved to a different location at the time of crash.
889 * So, fold only if the region is not boot memory area.
891 if ((start
+ size
) == base
&& start
>= fw_dump
.boot_mem_top
)
895 /* resize the array on reaching the limit */
896 if (mrange_info
->mem_range_cnt
== mrange_info
->max_mem_ranges
) {
899 if (mrange_info
->is_static
) {
900 pr_err("Reached array size limit for %s memory ranges\n",
905 ret
= fadump_alloc_mem_ranges(mrange_info
);
909 /* Update to the new resized array */
910 mem_ranges
= mrange_info
->mem_ranges
;
914 mem_ranges
[mrange_info
->mem_range_cnt
].base
= start
;
915 mrange_info
->mem_range_cnt
++;
918 mem_ranges
[mrange_info
->mem_range_cnt
- 1].size
= (end
- start
);
919 pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
920 mrange_info
->name
, (mrange_info
->mem_range_cnt
- 1),
921 start
, end
- 1, (end
- start
));
925 static int fadump_init_elfcore_header(char *bufp
)
929 elf
= (struct elfhdr
*) bufp
;
930 bufp
+= sizeof(struct elfhdr
);
931 memcpy(elf
->e_ident
, ELFMAG
, SELFMAG
);
932 elf
->e_ident
[EI_CLASS
] = ELF_CLASS
;
933 elf
->e_ident
[EI_DATA
] = ELF_DATA
;
934 elf
->e_ident
[EI_VERSION
] = EV_CURRENT
;
935 elf
->e_ident
[EI_OSABI
] = ELF_OSABI
;
936 memset(elf
->e_ident
+EI_PAD
, 0, EI_NIDENT
-EI_PAD
);
937 elf
->e_type
= ET_CORE
;
938 elf
->e_machine
= ELF_ARCH
;
939 elf
->e_version
= EV_CURRENT
;
941 elf
->e_phoff
= sizeof(struct elfhdr
);
944 if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2
))
946 else if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1
))
951 elf
->e_ehsize
= sizeof(struct elfhdr
);
952 elf
->e_phentsize
= sizeof(struct elf_phdr
);
954 elf
->e_shentsize
= 0;
962 * If the given physical address falls within the boot memory region then
963 * return the relocated address that points to the dump region reserved
964 * for saving initial boot memory contents.
966 static inline unsigned long fadump_relocate(unsigned long paddr
)
968 unsigned long raddr
, rstart
, rend
, rlast
, hole_size
;
974 for (i
= 0; i
< fw_dump
.boot_mem_regs_cnt
; i
++) {
975 rstart
= fw_dump
.boot_mem_addr
[i
];
976 rend
= rstart
+ fw_dump
.boot_mem_sz
[i
];
977 hole_size
+= (rstart
- rlast
);
979 if (paddr
>= rstart
&& paddr
< rend
) {
980 raddr
+= fw_dump
.boot_mem_dest_addr
- hole_size
;
987 pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr
, raddr
);
991 static void __init
populate_elf_pt_load(struct elf_phdr
*phdr
, u64 start
,
992 u64 size
, unsigned long long offset
)
995 phdr
->p_memsz
= size
;
996 phdr
->p_filesz
= size
;
997 phdr
->p_paddr
= start
;
998 phdr
->p_offset
= offset
;
999 phdr
->p_type
= PT_LOAD
;
1000 phdr
->p_flags
= PF_R
|PF_W
|PF_X
;
1001 phdr
->p_vaddr
= (unsigned long)__va(start
);
1004 static void __init
fadump_populate_elfcorehdr(struct fadump_crash_info_header
*fdh
)
1008 struct elf_phdr
*phdr
;
1009 u64 boot_mem_dest_offset
;
1010 unsigned long long i
, ra_start
, ra_end
, ra_size
, mstart
, mend
;
1012 bufp
= (char *) fw_dump
.elfcorehdr_addr
;
1013 fadump_init_elfcore_header(bufp
);
1014 elf
= (struct elfhdr
*)bufp
;
1015 bufp
+= sizeof(struct elfhdr
);
1018 * Set up ELF PT_NOTE, a placeholder for CPU notes information.
1019 * The notes info will be populated later by platform-specific code.
1020 * Hence, this PT_NOTE will always be the first ELF note.
1022 * NOTE: Any new ELF note addition should be placed after this note.
1024 phdr
= (struct elf_phdr
*)bufp
;
1025 bufp
+= sizeof(struct elf_phdr
);
1026 phdr
->p_type
= PT_NOTE
;
1034 /* Increment number of program headers. */
1037 /* setup ELF PT_NOTE for vmcoreinfo */
1038 phdr
= (struct elf_phdr
*)bufp
;
1039 bufp
+= sizeof(struct elf_phdr
);
1040 phdr
->p_type
= PT_NOTE
;
1044 phdr
->p_paddr
= phdr
->p_offset
= fdh
->vmcoreinfo_raddr
;
1045 phdr
->p_memsz
= phdr
->p_filesz
= fdh
->vmcoreinfo_size
;
1046 /* Increment number of program headers. */
1050 * Setup PT_LOAD sections. first include boot memory regions
1051 * and then add rest of the memory regions.
1053 boot_mem_dest_offset
= fw_dump
.boot_mem_dest_addr
;
1054 for (i
= 0; i
< fw_dump
.boot_mem_regs_cnt
; i
++) {
1055 phdr
= (struct elf_phdr
*)bufp
;
1056 bufp
+= sizeof(struct elf_phdr
);
1057 populate_elf_pt_load(phdr
, fw_dump
.boot_mem_addr
[i
],
1058 fw_dump
.boot_mem_sz
[i
],
1059 boot_mem_dest_offset
);
1060 /* Increment number of program headers. */
1062 boot_mem_dest_offset
+= fw_dump
.boot_mem_sz
[i
];
1065 /* Memory reserved for fadump in first kernel */
1066 ra_start
= fw_dump
.reserve_dump_area_start
;
1067 ra_size
= get_fadump_area_size();
1068 ra_end
= ra_start
+ ra_size
;
1070 phdr
= (struct elf_phdr
*)bufp
;
1071 for_each_mem_range(i
, &mstart
, &mend
) {
1072 /* Boot memory regions already added, skip them now */
1073 if (mstart
< fw_dump
.boot_mem_top
) {
1074 if (mend
> fw_dump
.boot_mem_top
)
1075 mstart
= fw_dump
.boot_mem_top
;
1080 /* Handle memblock regions overlaps with fadump reserved area */
1081 if ((ra_start
< mend
) && (ra_end
> mstart
)) {
1082 if ((mstart
< ra_start
) && (mend
> ra_end
)) {
1083 populate_elf_pt_load(phdr
, mstart
, ra_start
- mstart
, mstart
);
1084 /* Increment number of program headers. */
1086 bufp
+= sizeof(struct elf_phdr
);
1087 phdr
= (struct elf_phdr
*)bufp
;
1088 populate_elf_pt_load(phdr
, ra_end
, mend
- ra_end
, ra_end
);
1089 } else if (mstart
< ra_start
) {
1090 populate_elf_pt_load(phdr
, mstart
, ra_start
- mstart
, mstart
);
1091 } else if (ra_end
< mend
) {
1092 populate_elf_pt_load(phdr
, ra_end
, mend
- ra_end
, ra_end
);
1095 /* No overlap with fadump reserved memory region */
1096 populate_elf_pt_load(phdr
, mstart
, mend
- mstart
, mstart
);
1099 /* Increment number of program headers. */
1101 bufp
+= sizeof(struct elf_phdr
);
1102 phdr
= (struct elf_phdr
*) bufp
;
1106 static unsigned long init_fadump_header(unsigned long addr
)
1108 struct fadump_crash_info_header
*fdh
;
1114 addr
+= sizeof(struct fadump_crash_info_header
);
1116 memset(fdh
, 0, sizeof(struct fadump_crash_info_header
));
1117 fdh
->magic_number
= FADUMP_CRASH_INFO_MAGIC
;
1118 fdh
->version
= FADUMP_HEADER_VERSION
;
1119 /* We will set the crashing cpu id in crash_fadump() during crash. */
1120 fdh
->crashing_cpu
= FADUMP_CPU_UNKNOWN
;
1123 * The physical address and size of vmcoreinfo are required in the
1124 * second kernel to prepare elfcorehdr.
1126 fdh
->vmcoreinfo_raddr
= fadump_relocate(paddr_vmcoreinfo_note());
1127 fdh
->vmcoreinfo_size
= VMCOREINFO_NOTE_SIZE
;
1130 fdh
->pt_regs_sz
= sizeof(struct pt_regs
);
1132 * When LPAR is terminated by PYHP, ensure all possible CPUs'
1133 * register data is processed while exporting the vmcore.
1135 fdh
->cpu_mask
= *cpu_possible_mask
;
1136 fdh
->cpu_mask_sz
= sizeof(struct cpumask
);
1141 static int register_fadump(void)
1146 * If no memory is reserved then we can not register for firmware-
1149 if (!fw_dump
.reserve_dump_area_size
)
1152 addr
= fw_dump
.fadumphdr_addr
;
1154 /* Initialize fadump crash info header. */
1155 addr
= init_fadump_header(addr
);
1157 /* register the future kernel dump with firmware. */
1158 pr_debug("Registering for firmware-assisted kernel dump...\n");
1159 return fw_dump
.ops
->fadump_register(&fw_dump
);
1162 void fadump_cleanup(void)
1164 if (!fw_dump
.fadump_supported
)
1167 /* Invalidate the registration only if dump is active. */
1168 if (fw_dump
.dump_active
) {
1169 pr_debug("Invalidating firmware-assisted dump registration\n");
1170 fw_dump
.ops
->fadump_invalidate(&fw_dump
);
1171 } else if (fw_dump
.dump_registered
) {
1172 /* Un-register Firmware-assisted dump if it was registered. */
1173 fw_dump
.ops
->fadump_unregister(&fw_dump
);
1176 if (fw_dump
.ops
->fadump_cleanup
)
1177 fw_dump
.ops
->fadump_cleanup(&fw_dump
);
1180 static void fadump_free_reserved_memory(unsigned long start_pfn
,
1181 unsigned long end_pfn
)
1184 unsigned long time_limit
= jiffies
+ HZ
;
1186 pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
1187 PFN_PHYS(start_pfn
), PFN_PHYS(end_pfn
));
1189 for (pfn
= start_pfn
; pfn
< end_pfn
; pfn
++) {
1190 free_reserved_page(pfn_to_page(pfn
));
1192 if (time_after(jiffies
, time_limit
)) {
1194 time_limit
= jiffies
+ HZ
;
1200 * Skip memory holes and free memory that was actually reserved.
1202 static void fadump_release_reserved_area(u64 start
, u64 end
)
1204 unsigned long reg_spfn
, reg_epfn
;
1205 u64 tstart
, tend
, spfn
, epfn
;
1208 spfn
= PHYS_PFN(start
);
1209 epfn
= PHYS_PFN(end
);
1211 for_each_mem_pfn_range(i
, MAX_NUMNODES
, ®_spfn
, ®_epfn
, NULL
) {
1212 tstart
= max_t(u64
, spfn
, reg_spfn
);
1213 tend
= min_t(u64
, epfn
, reg_epfn
);
1215 if (tstart
< tend
) {
1216 fadump_free_reserved_memory(tstart
, tend
);
1227 * Sort the mem ranges in-place and merge adjacent ranges
1228 * to minimize the memory ranges count.
1230 static void sort_and_merge_mem_ranges(struct fadump_mrange_info
*mrange_info
)
1232 struct fadump_memory_range
*mem_ranges
;
1236 if (!reserved_mrange_info
.mem_range_cnt
)
1239 /* Sort the memory ranges */
1240 mem_ranges
= mrange_info
->mem_ranges
;
1241 for (i
= 0; i
< mrange_info
->mem_range_cnt
; i
++) {
1243 for (j
= (i
+ 1); j
< mrange_info
->mem_range_cnt
; j
++) {
1244 if (mem_ranges
[idx
].base
> mem_ranges
[j
].base
)
1248 swap(mem_ranges
[idx
], mem_ranges
[i
]);
1251 /* Merge adjacent reserved ranges */
1253 for (i
= 1; i
< mrange_info
->mem_range_cnt
; i
++) {
1254 base
= mem_ranges
[i
-1].base
;
1255 size
= mem_ranges
[i
-1].size
;
1256 if (mem_ranges
[i
].base
== (base
+ size
))
1257 mem_ranges
[idx
].size
+= mem_ranges
[i
].size
;
1263 mem_ranges
[idx
] = mem_ranges
[i
];
1266 mrange_info
->mem_range_cnt
= idx
+ 1;
1270 * Scan reserved-ranges to consider them while reserving/releasing
1271 * memory for FADump.
1273 static void __init
early_init_dt_scan_reserved_ranges(unsigned long node
)
1279 /* reserved-ranges already scanned */
1280 if (reserved_mrange_info
.mem_range_cnt
!= 0)
1283 prop
= of_get_flat_dt_prop(node
, "reserved-ranges", &len
);
1288 * Each reserved range is an (address,size) pair, 2 cells each,
1289 * totalling 4 cells per range.
1291 for (i
= 0; i
< len
/ (sizeof(*prop
) * 4); i
++) {
1294 base
= of_read_number(prop
+ (i
* 4) + 0, 2);
1295 size
= of_read_number(prop
+ (i
* 4) + 2, 2);
1298 ret
= fadump_add_mem_range(&reserved_mrange_info
,
1301 pr_warn("some reserved ranges are ignored!\n");
1307 /* Compact reserved ranges */
1308 sort_and_merge_mem_ranges(&reserved_mrange_info
);
1312 * Release the memory that was reserved during early boot to preserve the
1313 * crash'ed kernel's memory contents except reserved dump area (permanent
1314 * reservation) and reserved ranges used by F/W. The released memory will
1315 * be available for general use.
1317 static void fadump_release_memory(u64 begin
, u64 end
)
1319 u64 ra_start
, ra_end
, tstart
;
1322 ra_start
= fw_dump
.reserve_dump_area_start
;
1323 ra_end
= ra_start
+ fw_dump
.reserve_dump_area_size
;
1326 * If reserved ranges array limit is hit, overwrite the last reserved
1327 * memory range with reserved dump area to ensure it is excluded from
1328 * the memory being released (reused for next FADump registration).
1330 if (reserved_mrange_info
.mem_range_cnt
==
1331 reserved_mrange_info
.max_mem_ranges
)
1332 reserved_mrange_info
.mem_range_cnt
--;
1334 ret
= fadump_add_mem_range(&reserved_mrange_info
, ra_start
, ra_end
);
1338 /* Get the reserved ranges list in order first. */
1339 sort_and_merge_mem_ranges(&reserved_mrange_info
);
1341 /* Exclude reserved ranges and release remaining memory */
1343 for (i
= 0; i
< reserved_mrange_info
.mem_range_cnt
; i
++) {
1344 ra_start
= reserved_mrange_info
.mem_ranges
[i
].base
;
1345 ra_end
= ra_start
+ reserved_mrange_info
.mem_ranges
[i
].size
;
1347 if (tstart
>= ra_end
)
1350 if (tstart
< ra_start
)
1351 fadump_release_reserved_area(tstart
, ra_start
);
1356 fadump_release_reserved_area(tstart
, end
);
1359 static void fadump_free_elfcorehdr_buf(void)
1361 if (fw_dump
.elfcorehdr_addr
== 0 || fw_dump
.elfcorehdr_size
== 0)
1365 * Before freeing the memory of `elfcorehdr`, reset the global
1366 * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing
1369 elfcorehdr_addr
= ELFCORE_ADDR_ERR
;
1370 fadump_free_buffer(fw_dump
.elfcorehdr_addr
, fw_dump
.elfcorehdr_size
);
1371 fw_dump
.elfcorehdr_addr
= 0;
1372 fw_dump
.elfcorehdr_size
= 0;
1375 static void fadump_invalidate_release_mem(void)
1377 mutex_lock(&fadump_mutex
);
1378 if (!fw_dump
.dump_active
) {
1379 mutex_unlock(&fadump_mutex
);
1384 mutex_unlock(&fadump_mutex
);
1386 fadump_free_elfcorehdr_buf();
1387 fadump_release_memory(fw_dump
.boot_mem_top
, memblock_end_of_DRAM());
1388 fadump_free_cpu_notes_buf();
1391 * Setup kernel metadata and initialize the kernel dump
1392 * memory structure for FADump re-registration.
1394 if (fw_dump
.ops
->fadump_setup_metadata
&&
1395 (fw_dump
.ops
->fadump_setup_metadata(&fw_dump
) < 0))
1396 pr_warn("Failed to setup kernel metadata!\n");
1397 fw_dump
.ops
->fadump_init_mem_struct(&fw_dump
);
1400 static ssize_t
release_mem_store(struct kobject
*kobj
,
1401 struct kobj_attribute
*attr
,
1402 const char *buf
, size_t count
)
1406 if (!fw_dump
.dump_active
)
1409 if (kstrtoint(buf
, 0, &input
))
1414 * Take away the '/proc/vmcore'. We are releasing the dump
1415 * memory, hence it will not be valid anymore.
1417 #ifdef CONFIG_PROC_VMCORE
1420 fadump_invalidate_release_mem();
1427 /* Release the reserved memory and disable the FADump */
1428 static void __init
unregister_fadump(void)
1431 fadump_release_memory(fw_dump
.reserve_dump_area_start
,
1432 fw_dump
.reserve_dump_area_size
);
1433 fw_dump
.fadump_enabled
= 0;
1434 kobject_put(fadump_kobj
);
1437 static ssize_t
enabled_show(struct kobject
*kobj
,
1438 struct kobj_attribute
*attr
,
1441 return sprintf(buf
, "%d\n", fw_dump
.fadump_enabled
);
1445 * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates
1446 * to usersapce that fadump re-registration is not required on memory
1449 static ssize_t
hotplug_ready_show(struct kobject
*kobj
,
1450 struct kobj_attribute
*attr
,
1453 return sprintf(buf
, "%d\n", 1);
1456 static ssize_t
mem_reserved_show(struct kobject
*kobj
,
1457 struct kobj_attribute
*attr
,
1460 return sprintf(buf
, "%ld\n", fw_dump
.reserve_dump_area_size
);
1463 static ssize_t
registered_show(struct kobject
*kobj
,
1464 struct kobj_attribute
*attr
,
1467 return sprintf(buf
, "%d\n", fw_dump
.dump_registered
);
1470 static ssize_t
bootargs_append_show(struct kobject
*kobj
,
1471 struct kobj_attribute
*attr
,
1474 return sprintf(buf
, "%s\n", (char *)__va(fw_dump
.param_area
));
1477 static ssize_t
bootargs_append_store(struct kobject
*kobj
,
1478 struct kobj_attribute
*attr
,
1479 const char *buf
, size_t count
)
1483 if (!fw_dump
.fadump_enabled
|| fw_dump
.dump_active
)
1486 if (count
>= COMMAND_LINE_SIZE
)
1490 * Fail here instead of handling this scenario with
1491 * some silly workaround in capture kernel.
1493 if (saved_command_line_len
+ count
>= COMMAND_LINE_SIZE
) {
1494 pr_err("Appending parameters exceeds cmdline size!\n");
1498 params
= __va(fw_dump
.param_area
);
1499 strscpy_pad(params
, buf
, COMMAND_LINE_SIZE
);
1500 /* Remove newline character at the end. */
1501 if (params
[count
-1] == '\n')
1502 params
[count
-1] = '\0';
1507 static ssize_t
registered_store(struct kobject
*kobj
,
1508 struct kobj_attribute
*attr
,
1509 const char *buf
, size_t count
)
1514 if (!fw_dump
.fadump_enabled
|| fw_dump
.dump_active
)
1517 if (kstrtoint(buf
, 0, &input
))
1520 mutex_lock(&fadump_mutex
);
1524 if (fw_dump
.dump_registered
== 0) {
1528 /* Un-register Firmware-assisted dump */
1529 pr_debug("Un-register firmware-assisted dump\n");
1530 fw_dump
.ops
->fadump_unregister(&fw_dump
);
1533 if (fw_dump
.dump_registered
== 1) {
1534 /* Un-register Firmware-assisted dump */
1535 fw_dump
.ops
->fadump_unregister(&fw_dump
);
1537 /* Register Firmware-assisted dump */
1538 ret
= register_fadump();
1546 mutex_unlock(&fadump_mutex
);
1547 return ret
< 0 ? ret
: count
;
1550 static int fadump_region_show(struct seq_file
*m
, void *private)
1552 if (!fw_dump
.fadump_enabled
)
1555 mutex_lock(&fadump_mutex
);
1556 fw_dump
.ops
->fadump_region_show(&fw_dump
, m
);
1557 mutex_unlock(&fadump_mutex
);
1561 static struct kobj_attribute release_attr
= __ATTR_WO(release_mem
);
1562 static struct kobj_attribute enable_attr
= __ATTR_RO(enabled
);
1563 static struct kobj_attribute register_attr
= __ATTR_RW(registered
);
1564 static struct kobj_attribute mem_reserved_attr
= __ATTR_RO(mem_reserved
);
1565 static struct kobj_attribute hotplug_ready_attr
= __ATTR_RO(hotplug_ready
);
1566 static struct kobj_attribute bootargs_append_attr
= __ATTR_RW(bootargs_append
);
1568 static struct attribute
*fadump_attrs
[] = {
1570 ®ister_attr
.attr
,
1571 &mem_reserved_attr
.attr
,
1572 &hotplug_ready_attr
.attr
,
1576 ATTRIBUTE_GROUPS(fadump
);
1578 DEFINE_SHOW_ATTRIBUTE(fadump_region
);
1580 static void __init
fadump_init_files(void)
1584 fadump_kobj
= kobject_create_and_add("fadump", kernel_kobj
);
1586 pr_err("failed to create fadump kobject\n");
1590 if (fw_dump
.param_area
) {
1591 rc
= sysfs_create_file(fadump_kobj
, &bootargs_append_attr
.attr
);
1593 pr_err("unable to create bootargs_append sysfs file (%d)\n", rc
);
1596 debugfs_create_file("fadump_region", 0444, arch_debugfs_dir
, NULL
,
1597 &fadump_region_fops
);
1599 if (fw_dump
.dump_active
) {
1600 rc
= sysfs_create_file(fadump_kobj
, &release_attr
.attr
);
1602 pr_err("unable to create release_mem sysfs file (%d)\n",
1606 rc
= sysfs_create_groups(fadump_kobj
, fadump_groups
);
1608 pr_err("sysfs group creation failed (%d), unregistering FADump",
1610 unregister_fadump();
1615 * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to
1616 * create symlink at old location to maintain backward compatibility.
1618 * - fadump_enabled -> fadump/enabled
1619 * - fadump_registered -> fadump/registered
1620 * - fadump_release_mem -> fadump/release_mem
1622 rc
= compat_only_sysfs_link_entry_to_kobj(kernel_kobj
, fadump_kobj
,
1623 "enabled", "fadump_enabled");
1625 pr_err("unable to create fadump_enabled symlink (%d)", rc
);
1629 rc
= compat_only_sysfs_link_entry_to_kobj(kernel_kobj
, fadump_kobj
,
1631 "fadump_registered");
1633 pr_err("unable to create fadump_registered symlink (%d)", rc
);
1634 sysfs_remove_link(kernel_kobj
, "fadump_enabled");
1638 if (fw_dump
.dump_active
) {
1639 rc
= compat_only_sysfs_link_entry_to_kobj(kernel_kobj
,
1642 "fadump_release_mem");
1644 pr_err("unable to create fadump_release_mem symlink (%d)",
1650 static int __init
fadump_setup_elfcorehdr_buf(void)
1653 unsigned long elfcorehdr_size
;
1656 * Program header for CPU notes comes first, followed by one for
1657 * vmcoreinfo, and the remaining program headers correspond to
1660 elf_phdr_cnt
= 2 + fw_dump
.boot_mem_regs_cnt
+ memblock_num_regions(memory
);
1661 elfcorehdr_size
= sizeof(struct elfhdr
) + (elf_phdr_cnt
* sizeof(struct elf_phdr
));
1662 elfcorehdr_size
= PAGE_ALIGN(elfcorehdr_size
);
1664 fw_dump
.elfcorehdr_addr
= (u64
)fadump_alloc_buffer(elfcorehdr_size
);
1665 if (!fw_dump
.elfcorehdr_addr
) {
1666 pr_err("Failed to allocate %lu bytes for elfcorehdr\n",
1670 fw_dump
.elfcorehdr_size
= elfcorehdr_size
;
1675 * Check if the fadump header of crashed kernel is compatible with fadump kernel.
1677 * It checks the magic number, endianness, and size of non-primitive type
1678 * members of fadump header to ensure safe dump collection.
1680 static bool __init
is_fadump_header_compatible(struct fadump_crash_info_header
*fdh
)
1682 if (fdh
->magic_number
== FADUMP_CRASH_INFO_MAGIC_OLD
) {
1683 pr_err("Old magic number, can't process the dump.\n");
1687 if (fdh
->magic_number
!= FADUMP_CRASH_INFO_MAGIC
) {
1688 if (fdh
->magic_number
== swab64(FADUMP_CRASH_INFO_MAGIC
))
1689 pr_err("Endianness mismatch between the crashed and fadump kernels.\n");
1691 pr_err("Fadump header is corrupted.\n");
1697 * Dump collection is not safe if the size of non-primitive type members
1698 * of the fadump header do not match between crashed and fadump kernel.
1700 if (fdh
->pt_regs_sz
!= sizeof(struct pt_regs
) ||
1701 fdh
->cpu_mask_sz
!= sizeof(struct cpumask
)) {
1702 pr_err("Fadump header size mismatch.\n");
1709 static void __init
fadump_process(void)
1711 struct fadump_crash_info_header
*fdh
;
1713 fdh
= (struct fadump_crash_info_header
*) __va(fw_dump
.fadumphdr_addr
);
1715 pr_err("Crash info header is empty.\n");
1719 /* Avoid processing the dump if fadump header isn't compatible */
1720 if (!is_fadump_header_compatible(fdh
))
1723 /* Allocate buffer for elfcorehdr */
1724 if (fadump_setup_elfcorehdr_buf())
1727 fadump_populate_elfcorehdr(fdh
);
1729 /* Let platform update the CPU notes in elfcorehdr */
1730 if (fw_dump
.ops
->fadump_process(&fw_dump
) < 0)
1734 * elfcorehdr is now ready to be exported.
1736 * set elfcorehdr_addr so that vmcore module will export the
1737 * elfcorehdr through '/proc/vmcore'.
1739 elfcorehdr_addr
= virt_to_phys((void *)fw_dump
.elfcorehdr_addr
);
1743 fadump_invalidate_release_mem();
1747 * Reserve memory to store additional parameters to be passed
1748 * for fadump/capture kernel.
1750 void __init
fadump_setup_param_area(void)
1752 phys_addr_t range_start
, range_end
;
1754 if (!fw_dump
.param_area_supported
|| fw_dump
.dump_active
)
1757 /* This memory can't be used by PFW or bootloader as it is shared across kernels */
1758 if (early_radix_enabled()) {
1760 * Anywhere in the upper half should be good enough as all memory
1761 * is accessible in real mode.
1763 range_start
= memblock_end_of_DRAM() / 2;
1764 range_end
= memblock_end_of_DRAM();
1767 * Passing additional parameters is supported for hash MMU only
1768 * if the first memory block size is 768MB or higher.
1770 if (ppc64_rma_size
< 0x30000000)
1774 * 640 MB to 768 MB is not used by PFW/bootloader. So, try reserving
1775 * memory for passing additional parameters in this range to avoid
1776 * being stomped on by PFW/bootloader.
1778 range_start
= 0x2A000000;
1779 range_end
= range_start
+ 0x4000000;
1782 fw_dump
.param_area
= memblock_phys_alloc_range(COMMAND_LINE_SIZE
,
1786 if (!fw_dump
.param_area
) {
1787 pr_warn("WARNING: Could not setup area to pass additional parameters!\n");
1791 memset((void *)fw_dump
.param_area
, 0, COMMAND_LINE_SIZE
);
1795 * Prepare for firmware-assisted dump.
1797 int __init
setup_fadump(void)
1799 if (!fw_dump
.fadump_supported
)
1802 fadump_init_files();
1803 fadump_show_config();
1805 if (!fw_dump
.fadump_enabled
)
1809 * If dump data is available then see if it is valid and prepare for
1810 * saving it to the disk.
1812 if (fw_dump
.dump_active
) {
1815 /* Initialize the kernel dump memory structure and register with f/w */
1816 else if (fw_dump
.reserve_dump_area_size
) {
1817 fw_dump
.ops
->fadump_init_mem_struct(&fw_dump
);
1822 * In case of panic, fadump is triggered via ppc_panic_event()
1823 * panic notifier. Setting crash_kexec_post_notifiers to 'true'
1824 * lets panic() function take crash friendly path before panic
1825 * notifiers are invoked.
1827 crash_kexec_post_notifiers
= true;
1832 * Use subsys_initcall_sync() here because there is dependency with
1833 * crash_save_vmcoreinfo_init(), which must run first to ensure vmcoreinfo initialization
1834 * is done before registering with f/w.
1836 subsys_initcall_sync(setup_fadump
);
1837 #else /* !CONFIG_PRESERVE_FA_DUMP */
1839 /* Scan the Firmware Assisted dump configuration details. */
1840 int __init
early_init_dt_scan_fw_dump(unsigned long node
, const char *uname
,
1841 int depth
, void *data
)
1843 if ((depth
!= 1) || (strcmp(uname
, "ibm,opal") != 0))
1846 opal_fadump_dt_scan(&fw_dump
, node
);
1851 * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
1852 * preserve crash data. The subsequent memory preserving kernel boot
1853 * is likely to process this crash data.
1855 int __init
fadump_reserve_mem(void)
1857 if (fw_dump
.dump_active
) {
1859 * If last boot has crashed then reserve all the memory
1860 * above boot memory to preserve crash data.
1862 pr_info("Preserving crash data for processing in next boot.\n");
1863 fadump_reserve_crash_area(fw_dump
.boot_mem_top
);
1865 pr_debug("FADump-aware kernel..\n");
1869 #endif /* CONFIG_PRESERVE_FA_DUMP */
1871 /* Preserve everything above the base address */
1872 static void __init
fadump_reserve_crash_area(u64 base
)
1874 u64 i
, mstart
, mend
, msize
;
1876 for_each_mem_range(i
, &mstart
, &mend
) {
1877 msize
= mend
- mstart
;
1879 if ((mstart
+ msize
) < base
)
1882 if (mstart
< base
) {
1883 msize
-= (base
- mstart
);
1887 pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
1888 (msize
>> 20), mstart
);
1889 memblock_reserve(mstart
, msize
);