1 // SPDX-License-Identifier: GPL-2.0
3 * S390 kdump implementation
5 * Copyright IBM Corp. 2011
6 * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
9 #include <linux/crash_dump.h>
10 #include <asm/lowcore.h>
11 #include <linux/kernel.h>
12 #include <linux/init.h>
14 #include <linux/gfp.h>
15 #include <linux/slab.h>
16 #include <linux/memblock.h>
17 #include <linux/elf.h>
18 #include <asm/asm-offsets.h>
19 #include <asm/os_info.h>
24 #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
25 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
26 #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
28 static struct memblock_region oldmem_region
;
30 static struct memblock_type oldmem_type
= {
34 .regions
= &oldmem_region
,
39 struct list_head list
;
51 __vector128 vxrs_high
[16];
54 static LIST_HEAD(dump_save_areas
);
57 * Allocate a save area
59 struct save_area
* __init
save_area_alloc(bool is_boot_cpu
)
63 sa
= (void *) memblock_phys_alloc(sizeof(*sa
), 8);
65 panic("Failed to allocate save area\n");
68 list_add(&sa
->list
, &dump_save_areas
);
70 list_add_tail(&sa
->list
, &dump_save_areas
);
75 * Return the address of the save area for the boot CPU
77 struct save_area
* __init
save_area_boot_cpu(void)
79 return list_first_entry_or_null(&dump_save_areas
, struct save_area
, list
);
83 * Copy CPU registers into the save area
85 void __init
save_area_add_regs(struct save_area
*sa
, void *regs
)
89 lc
= (struct lowcore
*)(regs
- __LC_FPREGS_SAVE_AREA
);
90 memcpy(&sa
->psw
, &lc
->psw_save_area
, sizeof(sa
->psw
));
91 memcpy(&sa
->ctrs
, &lc
->cregs_save_area
, sizeof(sa
->ctrs
));
92 memcpy(&sa
->gprs
, &lc
->gpregs_save_area
, sizeof(sa
->gprs
));
93 memcpy(&sa
->acrs
, &lc
->access_regs_save_area
, sizeof(sa
->acrs
));
94 memcpy(&sa
->fprs
, &lc
->floating_pt_save_area
, sizeof(sa
->fprs
));
95 memcpy(&sa
->fpc
, &lc
->fpt_creg_save_area
, sizeof(sa
->fpc
));
96 memcpy(&sa
->prefix
, &lc
->prefixreg_save_area
, sizeof(sa
->prefix
));
97 memcpy(&sa
->todpreg
, &lc
->tod_progreg_save_area
, sizeof(sa
->todpreg
));
98 memcpy(&sa
->timer
, &lc
->cpu_timer_save_area
, sizeof(sa
->timer
));
99 memcpy(&sa
->todcmp
, &lc
->clock_comp_save_area
, sizeof(sa
->todcmp
));
103 * Copy vector registers into the save area
105 void __init
save_area_add_vxrs(struct save_area
*sa
, __vector128
*vxrs
)
109 /* Copy lower halves of vector registers 0-15 */
110 for (i
= 0; i
< 16; i
++)
111 memcpy(&sa
->vxrs_low
[i
], &vxrs
[i
].u
[2], 8);
112 /* Copy vector registers 16-31 */
113 memcpy(sa
->vxrs_high
, vxrs
+ 16, 16 * sizeof(__vector128
));
117 * Return physical address for virtual address
119 static inline void *load_real_addr(void *addr
)
121 unsigned long real_addr
;
128 : "=a" (real_addr
) : "a" (addr
) : "cc");
129 return (void *)real_addr
;
133 * Copy memory of the old, dumped system to a kernel space virtual address
135 int copy_oldmem_kernel(void *dst
, void *src
, size_t count
)
137 unsigned long from
, len
;
143 if (!OLDMEM_BASE
&& from
< sclp
.hsa_size
) {
144 /* Copy from zfcp/nvme dump HSA area */
145 len
= min(count
, sclp
.hsa_size
- from
);
146 rc
= memcpy_hsa_kernel(dst
, from
, len
);
150 /* Check for swapped kdump oldmem areas */
151 if (OLDMEM_BASE
&& from
- OLDMEM_BASE
< OLDMEM_SIZE
) {
153 len
= min(count
, OLDMEM_SIZE
- from
);
154 } else if (OLDMEM_BASE
&& from
< OLDMEM_SIZE
) {
155 len
= min(count
, OLDMEM_SIZE
- from
);
160 if (is_vmalloc_or_module_addr(dst
)) {
161 ra
= load_real_addr(dst
);
162 len
= min(PAGE_SIZE
- offset_in_page(ra
), len
);
166 if (memcpy_real(ra
, (void *) from
, len
))
177 * Copy memory of the old, dumped system to a user space virtual address
179 static int copy_oldmem_user(void __user
*dst
, void *src
, size_t count
)
181 unsigned long from
, len
;
186 if (!OLDMEM_BASE
&& from
< sclp
.hsa_size
) {
187 /* Copy from zfcp/nvme dump HSA area */
188 len
= min(count
, sclp
.hsa_size
- from
);
189 rc
= memcpy_hsa_user(dst
, from
, len
);
193 /* Check for swapped kdump oldmem areas */
194 if (OLDMEM_BASE
&& from
- OLDMEM_BASE
< OLDMEM_SIZE
) {
196 len
= min(count
, OLDMEM_SIZE
- from
);
197 } else if (OLDMEM_BASE
&& from
< OLDMEM_SIZE
) {
198 len
= min(count
, OLDMEM_SIZE
- from
);
203 rc
= copy_to_user_real(dst
, (void *) from
, count
);
215 * Copy one page from "oldmem"
217 ssize_t
copy_oldmem_page(unsigned long pfn
, char *buf
, size_t csize
,
218 unsigned long offset
, int userbuf
)
225 src
= (void *) (pfn
<< PAGE_SHIFT
) + offset
;
227 rc
= copy_oldmem_user((void __force __user
*) buf
, src
, csize
);
229 rc
= copy_oldmem_kernel((void *) buf
, src
, csize
);
234 * Remap "oldmem" for kdump
236 * For the kdump reserved memory this functions performs a swap operation:
237 * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
239 static int remap_oldmem_pfn_range_kdump(struct vm_area_struct
*vma
,
240 unsigned long from
, unsigned long pfn
,
241 unsigned long size
, pgprot_t prot
)
243 unsigned long size_old
;
246 if (pfn
< OLDMEM_SIZE
>> PAGE_SHIFT
) {
247 size_old
= min(size
, OLDMEM_SIZE
- (pfn
<< PAGE_SHIFT
));
248 rc
= remap_pfn_range(vma
, from
,
249 pfn
+ (OLDMEM_BASE
>> PAGE_SHIFT
),
251 if (rc
|| size
== size_old
)
255 pfn
+= size_old
>> PAGE_SHIFT
;
257 return remap_pfn_range(vma
, from
, pfn
, size
, prot
);
261 * Remap "oldmem" for zfcp/nvme dump
263 * We only map available memory above HSA size. Memory below HSA size
264 * is read on demand using the copy_oldmem_page() function.
266 static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct
*vma
,
269 unsigned long size
, pgprot_t prot
)
271 unsigned long hsa_end
= sclp
.hsa_size
;
272 unsigned long size_hsa
;
274 if (pfn
< hsa_end
>> PAGE_SHIFT
) {
275 size_hsa
= min(size
, hsa_end
- (pfn
<< PAGE_SHIFT
));
276 if (size
== size_hsa
)
280 pfn
+= size_hsa
>> PAGE_SHIFT
;
282 return remap_pfn_range(vma
, from
, pfn
, size
, prot
);
286 * Remap "oldmem" for kdump or zfcp/nvme dump
288 int remap_oldmem_pfn_range(struct vm_area_struct
*vma
, unsigned long from
,
289 unsigned long pfn
, unsigned long size
, pgprot_t prot
)
292 return remap_oldmem_pfn_range_kdump(vma
, from
, pfn
, size
, prot
);
294 return remap_oldmem_pfn_range_zfcpdump(vma
, from
, pfn
, size
,
298 static const char *nt_name(Elf64_Word type
)
300 const char *name
= "LINUX";
302 if (type
== NT_PRPSINFO
|| type
== NT_PRSTATUS
|| type
== NT_PRFPREG
)
303 name
= KEXEC_CORE_NOTE_NAME
;
308 * Initialize ELF note
310 static void *nt_init_name(void *buf
, Elf64_Word type
, void *desc
, int d_len
,
316 note
= (Elf64_Nhdr
*)buf
;
317 note
->n_namesz
= strlen(name
) + 1;
318 note
->n_descsz
= d_len
;
320 len
= sizeof(Elf64_Nhdr
);
322 memcpy(buf
+ len
, name
, note
->n_namesz
);
323 len
= roundup(len
+ note
->n_namesz
, 4);
325 memcpy(buf
+ len
, desc
, note
->n_descsz
);
326 len
= roundup(len
+ note
->n_descsz
, 4);
328 return PTR_ADD(buf
, len
);
331 static inline void *nt_init(void *buf
, Elf64_Word type
, void *desc
, int d_len
)
333 return nt_init_name(buf
, type
, desc
, d_len
, nt_name(type
));
337 * Calculate the size of ELF note
339 static size_t nt_size_name(int d_len
, const char *name
)
343 size
= sizeof(Elf64_Nhdr
);
344 size
+= roundup(strlen(name
) + 1, 4);
345 size
+= roundup(d_len
, 4);
350 static inline size_t nt_size(Elf64_Word type
, int d_len
)
352 return nt_size_name(d_len
, nt_name(type
));
356 * Fill ELF notes for one CPU with save area registers
358 static void *fill_cpu_elf_notes(void *ptr
, int cpu
, struct save_area
*sa
)
360 struct elf_prstatus nt_prstatus
;
361 elf_fpregset_t nt_fpregset
;
363 /* Prepare prstatus note */
364 memset(&nt_prstatus
, 0, sizeof(nt_prstatus
));
365 memcpy(&nt_prstatus
.pr_reg
.gprs
, sa
->gprs
, sizeof(sa
->gprs
));
366 memcpy(&nt_prstatus
.pr_reg
.psw
, sa
->psw
, sizeof(sa
->psw
));
367 memcpy(&nt_prstatus
.pr_reg
.acrs
, sa
->acrs
, sizeof(sa
->acrs
));
368 nt_prstatus
.pr_pid
= cpu
;
369 /* Prepare fpregset (floating point) note */
370 memset(&nt_fpregset
, 0, sizeof(nt_fpregset
));
371 memcpy(&nt_fpregset
.fpc
, &sa
->fpc
, sizeof(sa
->fpc
));
372 memcpy(&nt_fpregset
.fprs
, &sa
->fprs
, sizeof(sa
->fprs
));
373 /* Create ELF notes for the CPU */
374 ptr
= nt_init(ptr
, NT_PRSTATUS
, &nt_prstatus
, sizeof(nt_prstatus
));
375 ptr
= nt_init(ptr
, NT_PRFPREG
, &nt_fpregset
, sizeof(nt_fpregset
));
376 ptr
= nt_init(ptr
, NT_S390_TIMER
, &sa
->timer
, sizeof(sa
->timer
));
377 ptr
= nt_init(ptr
, NT_S390_TODCMP
, &sa
->todcmp
, sizeof(sa
->todcmp
));
378 ptr
= nt_init(ptr
, NT_S390_TODPREG
, &sa
->todpreg
, sizeof(sa
->todpreg
));
379 ptr
= nt_init(ptr
, NT_S390_CTRS
, &sa
->ctrs
, sizeof(sa
->ctrs
));
380 ptr
= nt_init(ptr
, NT_S390_PREFIX
, &sa
->prefix
, sizeof(sa
->prefix
));
381 if (MACHINE_HAS_VX
) {
382 ptr
= nt_init(ptr
, NT_S390_VXRS_HIGH
,
383 &sa
->vxrs_high
, sizeof(sa
->vxrs_high
));
384 ptr
= nt_init(ptr
, NT_S390_VXRS_LOW
,
385 &sa
->vxrs_low
, sizeof(sa
->vxrs_low
));
391 * Calculate size of ELF notes per cpu
393 static size_t get_cpu_elf_notes_size(void)
395 struct save_area
*sa
= NULL
;
398 size
= nt_size(NT_PRSTATUS
, sizeof(struct elf_prstatus
));
399 size
+= nt_size(NT_PRFPREG
, sizeof(elf_fpregset_t
));
400 size
+= nt_size(NT_S390_TIMER
, sizeof(sa
->timer
));
401 size
+= nt_size(NT_S390_TODCMP
, sizeof(sa
->todcmp
));
402 size
+= nt_size(NT_S390_TODPREG
, sizeof(sa
->todpreg
));
403 size
+= nt_size(NT_S390_CTRS
, sizeof(sa
->ctrs
));
404 size
+= nt_size(NT_S390_PREFIX
, sizeof(sa
->prefix
));
405 if (MACHINE_HAS_VX
) {
406 size
+= nt_size(NT_S390_VXRS_HIGH
, sizeof(sa
->vxrs_high
));
407 size
+= nt_size(NT_S390_VXRS_LOW
, sizeof(sa
->vxrs_low
));
414 * Initialize prpsinfo note (new kernel)
416 static void *nt_prpsinfo(void *ptr
)
418 struct elf_prpsinfo prpsinfo
;
420 memset(&prpsinfo
, 0, sizeof(prpsinfo
));
421 prpsinfo
.pr_sname
= 'R';
422 strcpy(prpsinfo
.pr_fname
, "vmlinux");
423 return nt_init(ptr
, NT_PRPSINFO
, &prpsinfo
, sizeof(prpsinfo
));
427 * Get vmcoreinfo using lowcore->vmcore_info (new kernel)
429 static void *get_vmcoreinfo_old(unsigned long *size
)
431 char nt_name
[11], *vmcoreinfo
;
435 if (copy_oldmem_kernel(&addr
, &S390_lowcore
.vmcore_info
, sizeof(addr
)))
437 memset(nt_name
, 0, sizeof(nt_name
));
438 if (copy_oldmem_kernel(¬e
, addr
, sizeof(note
)))
440 if (copy_oldmem_kernel(nt_name
, addr
+ sizeof(note
),
441 sizeof(nt_name
) - 1))
443 if (strcmp(nt_name
, VMCOREINFO_NOTE_NAME
) != 0)
445 vmcoreinfo
= kzalloc(note
.n_descsz
, GFP_KERNEL
);
448 if (copy_oldmem_kernel(vmcoreinfo
, addr
+ 24, note
.n_descsz
)) {
452 *size
= note
.n_descsz
;
457 * Initialize vmcoreinfo note (new kernel)
459 static void *nt_vmcoreinfo(void *ptr
)
461 const char *name
= VMCOREINFO_NOTE_NAME
;
465 vmcoreinfo
= os_info_old_entry(OS_INFO_VMCOREINFO
, &size
);
467 return nt_init_name(ptr
, 0, vmcoreinfo
, size
, name
);
469 vmcoreinfo
= get_vmcoreinfo_old(&size
);
472 ptr
= nt_init_name(ptr
, 0, vmcoreinfo
, size
, name
);
477 static size_t nt_vmcoreinfo_size(void)
479 const char *name
= VMCOREINFO_NOTE_NAME
;
483 vmcoreinfo
= os_info_old_entry(OS_INFO_VMCOREINFO
, &size
);
485 return nt_size_name(size
, name
);
487 vmcoreinfo
= get_vmcoreinfo_old(&size
);
492 return nt_size_name(size
, name
);
496 * Initialize final note (needed for /proc/vmcore code)
498 static void *nt_final(void *ptr
)
502 note
= (Elf64_Nhdr
*) ptr
;
506 return PTR_ADD(ptr
, sizeof(Elf64_Nhdr
));
510 * Initialize ELF header (new kernel)
512 static void *ehdr_init(Elf64_Ehdr
*ehdr
, int mem_chunk_cnt
)
514 memset(ehdr
, 0, sizeof(*ehdr
));
515 memcpy(ehdr
->e_ident
, ELFMAG
, SELFMAG
);
516 ehdr
->e_ident
[EI_CLASS
] = ELFCLASS64
;
517 ehdr
->e_ident
[EI_DATA
] = ELFDATA2MSB
;
518 ehdr
->e_ident
[EI_VERSION
] = EV_CURRENT
;
519 memset(ehdr
->e_ident
+ EI_PAD
, 0, EI_NIDENT
- EI_PAD
);
520 ehdr
->e_type
= ET_CORE
;
521 ehdr
->e_machine
= EM_S390
;
522 ehdr
->e_version
= EV_CURRENT
;
523 ehdr
->e_phoff
= sizeof(Elf64_Ehdr
);
524 ehdr
->e_ehsize
= sizeof(Elf64_Ehdr
);
525 ehdr
->e_phentsize
= sizeof(Elf64_Phdr
);
526 ehdr
->e_phnum
= mem_chunk_cnt
+ 1;
531 * Return CPU count for ELF header (new kernel)
533 static int get_cpu_cnt(void)
535 struct save_area
*sa
;
538 list_for_each_entry(sa
, &dump_save_areas
, list
)
545 * Return memory chunk count for ELF header (new kernel)
547 static int get_mem_chunk_cnt(void)
552 for_each_physmem_range(idx
, &oldmem_type
, NULL
, NULL
)
558 * Initialize ELF loads (new kernel)
560 static void loads_init(Elf64_Phdr
*phdr
, u64 loads_offset
)
562 phys_addr_t start
, end
;
565 for_each_physmem_range(idx
, &oldmem_type
, &start
, &end
) {
566 phdr
->p_filesz
= end
- start
;
567 phdr
->p_type
= PT_LOAD
;
568 phdr
->p_offset
= start
;
569 phdr
->p_vaddr
= start
;
570 phdr
->p_paddr
= start
;
571 phdr
->p_memsz
= end
- start
;
572 phdr
->p_flags
= PF_R
| PF_W
| PF_X
;
573 phdr
->p_align
= PAGE_SIZE
;
579 * Initialize notes (new kernel)
581 static void *notes_init(Elf64_Phdr
*phdr
, void *ptr
, u64 notes_offset
)
583 struct save_area
*sa
;
584 void *ptr_start
= ptr
;
587 ptr
= nt_prpsinfo(ptr
);
590 list_for_each_entry(sa
, &dump_save_areas
, list
)
592 ptr
= fill_cpu_elf_notes(ptr
, cpu
++, sa
);
593 ptr
= nt_vmcoreinfo(ptr
);
595 memset(phdr
, 0, sizeof(*phdr
));
596 phdr
->p_type
= PT_NOTE
;
597 phdr
->p_offset
= notes_offset
;
598 phdr
->p_filesz
= (unsigned long) PTR_SUB(ptr
, ptr_start
);
599 phdr
->p_memsz
= phdr
->p_filesz
;
603 static size_t get_elfcorehdr_size(int mem_chunk_cnt
)
607 size
= sizeof(Elf64_Ehdr
);
609 size
+= sizeof(Elf64_Phdr
);
611 size
+= nt_size(NT_PRPSINFO
, sizeof(struct elf_prpsinfo
));
613 size
+= get_cpu_cnt() * get_cpu_elf_notes_size();
615 size
+= nt_vmcoreinfo_size();
617 size
+= sizeof(Elf64_Nhdr
);
619 size
+= mem_chunk_cnt
* sizeof(Elf64_Phdr
);
625 * Create ELF core header (new kernel)
627 int elfcorehdr_alloc(unsigned long long *addr
, unsigned long long *size
)
629 Elf64_Phdr
*phdr_notes
, *phdr_loads
;
635 /* If we are not in kdump or zfcp/nvme dump mode return */
636 if (!OLDMEM_BASE
&& !is_ipl_type_dump())
638 /* If we cannot get HSA size for zfcp/nvme dump return error */
639 if (is_ipl_type_dump() && !sclp
.hsa_size
)
642 /* For kdump, exclude previous crashkernel memory */
644 oldmem_region
.base
= OLDMEM_BASE
;
645 oldmem_region
.size
= OLDMEM_SIZE
;
646 oldmem_type
.total_size
= OLDMEM_SIZE
;
649 mem_chunk_cnt
= get_mem_chunk_cnt();
651 alloc_size
= get_elfcorehdr_size(mem_chunk_cnt
);
653 hdr
= kzalloc(alloc_size
, GFP_KERNEL
);
655 /* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
656 * a dump with this crash kernel will fail. Panic now to allow other
657 * dump mechanisms to take over.
660 panic("s390 kdump allocating elfcorehdr failed");
662 /* Init elf header */
663 ptr
= ehdr_init(hdr
, mem_chunk_cnt
);
664 /* Init program headers */
666 ptr
= PTR_ADD(ptr
, sizeof(Elf64_Phdr
));
668 ptr
= PTR_ADD(ptr
, sizeof(Elf64_Phdr
) * mem_chunk_cnt
);
670 hdr_off
= PTR_DIFF(ptr
, hdr
);
671 ptr
= notes_init(phdr_notes
, ptr
, ((unsigned long) hdr
) + hdr_off
);
673 hdr_off
= PTR_DIFF(ptr
, hdr
);
674 loads_init(phdr_loads
, hdr_off
);
675 *addr
= (unsigned long long) hdr
;
676 *size
= (unsigned long long) hdr_off
;
677 BUG_ON(elfcorehdr_size
> alloc_size
);
682 * Free ELF core header (new kernel)
684 void elfcorehdr_free(unsigned long long addr
)
686 kfree((void *)(unsigned long)addr
);
690 * Read from ELF header
692 ssize_t
elfcorehdr_read(char *buf
, size_t count
, u64
*ppos
)
694 void *src
= (void *)(unsigned long)*ppos
;
696 memcpy(buf
, src
, count
);
702 * Read from ELF notes data
704 ssize_t
elfcorehdr_read_notes(char *buf
, size_t count
, u64
*ppos
)
706 void *src
= (void *)(unsigned long)*ppos
;
708 memcpy(buf
, src
, count
);