2 * Hypervisor-assisted dump
4 * Linas Vepstas, Manish Ahuja 2008
5 * Copyright 2008 IBM Corp.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
14 #include <linux/gfp.h>
15 #include <linux/init.h>
16 #include <linux/kobject.h>
19 #include <linux/pfn.h>
20 #include <linux/swap.h>
21 #include <linux/sysfs.h>
24 #include <asm/phyp_dump.h>
25 #include <asm/machdep.h>
29 /* Variables, used to communicate data between early boot and late boot */
30 static struct phyp_dump phyp_dump_vars
;
31 struct phyp_dump
*phyp_dump_info
= &phyp_dump_vars
;
33 static int ibm_configure_kernel_dump
;
34 /* ------------------------------------------------- */
35 /* RTAS interfaces to declare the dump regions */
44 u64 destination_address
;
47 struct phyp_dump_header
{
52 u32 first_offset_section
;
53 u32 dump_disk_section
;
58 /* No dump disk path string used */
60 struct dump_section cpu_data
;
61 struct dump_section hpte_data
;
62 struct dump_section kernel_data
;
65 /* The dump header *must be* in low memory, so .bss it */
66 static struct phyp_dump_header phdr
;
68 #define NUM_DUMP_SECTIONS 3
69 #define DUMP_HEADER_VERSION 0x1
70 #define DUMP_REQUEST_FLAG 0x1
71 #define DUMP_SOURCE_CPU 0x0001
72 #define DUMP_SOURCE_HPTE 0x0002
73 #define DUMP_SOURCE_RMO 0x0011
74 #define DUMP_ERROR_FLAG 0x2000
75 #define DUMP_TRIGGERED 0x4000
76 #define DUMP_PERFORMED 0x8000
80 * init_dump_header() - initialize the header declaring a dump
81 * Returns: length of dump save area.
83 * When the hypervisor saves crashed state, it needs to put
84 * it somewhere. The dump header tells the hypervisor where
85 * the data can be saved.
87 static unsigned long init_dump_header(struct phyp_dump_header
*ph
)
89 unsigned long addr_offset
= 0;
91 /* Set up the dump header */
92 ph
->version
= DUMP_HEADER_VERSION
;
93 ph
->num_of_sections
= NUM_DUMP_SECTIONS
;
96 ph
->first_offset_section
=
97 (u32
)offsetof(struct phyp_dump_header
, cpu_data
);
98 ph
->dump_disk_section
= 0;
100 ph
->num_of_blocks_dd
= 0;
103 ph
->maxtime_to_auto
= 0; /* disabled */
105 /* The first two sections are mandatory */
106 ph
->cpu_data
.dump_flags
= DUMP_REQUEST_FLAG
;
107 ph
->cpu_data
.source_type
= DUMP_SOURCE_CPU
;
108 ph
->cpu_data
.source_address
= 0;
109 ph
->cpu_data
.source_length
= phyp_dump_info
->cpu_state_size
;
110 ph
->cpu_data
.destination_address
= addr_offset
;
111 addr_offset
+= phyp_dump_info
->cpu_state_size
;
113 ph
->hpte_data
.dump_flags
= DUMP_REQUEST_FLAG
;
114 ph
->hpte_data
.source_type
= DUMP_SOURCE_HPTE
;
115 ph
->hpte_data
.source_address
= 0;
116 ph
->hpte_data
.source_length
= phyp_dump_info
->hpte_region_size
;
117 ph
->hpte_data
.destination_address
= addr_offset
;
118 addr_offset
+= phyp_dump_info
->hpte_region_size
;
120 /* This section describes the low kernel region */
121 ph
->kernel_data
.dump_flags
= DUMP_REQUEST_FLAG
;
122 ph
->kernel_data
.source_type
= DUMP_SOURCE_RMO
;
123 ph
->kernel_data
.source_address
= PHYP_DUMP_RMR_START
;
124 ph
->kernel_data
.source_length
= PHYP_DUMP_RMR_END
;
125 ph
->kernel_data
.destination_address
= addr_offset
;
126 addr_offset
+= ph
->kernel_data
.source_length
;
131 static void print_dump_header(const struct phyp_dump_header
*ph
)
137 printk(KERN_INFO
"dump header:\n");
138 /* setup some ph->sections required */
139 printk(KERN_INFO
"version = %d\n", ph
->version
);
140 printk(KERN_INFO
"Sections = %d\n", ph
->num_of_sections
);
141 printk(KERN_INFO
"Status = 0x%x\n", ph
->status
);
143 /* No ph->disk, so all should be set to 0 */
144 printk(KERN_INFO
"Offset to first section 0x%x\n",
145 ph
->first_offset_section
);
146 printk(KERN_INFO
"dump disk sections should be zero\n");
147 printk(KERN_INFO
"dump disk section = %d\n", ph
->dump_disk_section
);
148 printk(KERN_INFO
"block num = %lld\n", ph
->block_num_dd
);
149 printk(KERN_INFO
"number of blocks = %lld\n", ph
->num_of_blocks_dd
);
150 printk(KERN_INFO
"dump disk offset = %d\n", ph
->offset_dd
);
151 printk(KERN_INFO
"Max auto time= %d\n", ph
->maxtime_to_auto
);
153 /*set cpu state and hpte states as well scratch pad area */
154 printk(KERN_INFO
" CPU AREA\n");
155 printk(KERN_INFO
"cpu dump_flags =%d\n", ph
->cpu_data
.dump_flags
);
156 printk(KERN_INFO
"cpu source_type =%d\n", ph
->cpu_data
.source_type
);
157 printk(KERN_INFO
"cpu error_flags =%d\n", ph
->cpu_data
.error_flags
);
158 printk(KERN_INFO
"cpu source_address =%llx\n",
159 ph
->cpu_data
.source_address
);
160 printk(KERN_INFO
"cpu source_length =%llx\n",
161 ph
->cpu_data
.source_length
);
162 printk(KERN_INFO
"cpu length_copied =%llx\n",
163 ph
->cpu_data
.length_copied
);
165 printk(KERN_INFO
" HPTE AREA\n");
166 printk(KERN_INFO
"HPTE dump_flags =%d\n", ph
->hpte_data
.dump_flags
);
167 printk(KERN_INFO
"HPTE source_type =%d\n", ph
->hpte_data
.source_type
);
168 printk(KERN_INFO
"HPTE error_flags =%d\n", ph
->hpte_data
.error_flags
);
169 printk(KERN_INFO
"HPTE source_address =%llx\n",
170 ph
->hpte_data
.source_address
);
171 printk(KERN_INFO
"HPTE source_length =%llx\n",
172 ph
->hpte_data
.source_length
);
173 printk(KERN_INFO
"HPTE length_copied =%llx\n",
174 ph
->hpte_data
.length_copied
);
176 printk(KERN_INFO
" SRSD AREA\n");
177 printk(KERN_INFO
"SRSD dump_flags =%d\n", ph
->kernel_data
.dump_flags
);
178 printk(KERN_INFO
"SRSD source_type =%d\n", ph
->kernel_data
.source_type
);
179 printk(KERN_INFO
"SRSD error_flags =%d\n", ph
->kernel_data
.error_flags
);
180 printk(KERN_INFO
"SRSD source_address =%llx\n",
181 ph
->kernel_data
.source_address
);
182 printk(KERN_INFO
"SRSD source_length =%llx\n",
183 ph
->kernel_data
.source_length
);
184 printk(KERN_INFO
"SRSD length_copied =%llx\n",
185 ph
->kernel_data
.length_copied
);
189 static ssize_t
show_phyp_dump_active(struct kobject
*kobj
,
190 struct kobj_attribute
*attr
, char *buf
)
193 /* create filesystem entry so kdump is phyp-dump aware */
194 return sprintf(buf
, "%lx\n", phyp_dump_info
->phyp_dump_at_boot
);
197 static struct kobj_attribute pdl
= __ATTR(phyp_dump_active
, 0600,
198 show_phyp_dump_active
,
201 static void register_dump_area(struct phyp_dump_header
*ph
, unsigned long addr
)
205 /* Add addr value if not initialized before */
206 if (ph
->cpu_data
.destination_address
== 0) {
207 ph
->cpu_data
.destination_address
+= addr
;
208 ph
->hpte_data
.destination_address
+= addr
;
209 ph
->kernel_data
.destination_address
+= addr
;
212 /* ToDo Invalidate kdump and free memory range. */
215 rc
= rtas_call(ibm_configure_kernel_dump
, 3, 1, NULL
,
216 1, ph
, sizeof(struct phyp_dump_header
));
217 } while (rtas_busy_delay(rc
));
220 printk(KERN_ERR
"phyp-dump: unexpected error (%d) on "
222 print_dump_header(ph
);
226 rc
= sysfs_create_file(kernel_kobj
, &pdl
.attr
);
228 printk(KERN_ERR
"phyp-dump: unable to create sysfs"
233 void invalidate_last_dump(struct phyp_dump_header
*ph
, unsigned long addr
)
237 /* Add addr value if not initialized before */
238 if (ph
->cpu_data
.destination_address
== 0) {
239 ph
->cpu_data
.destination_address
+= addr
;
240 ph
->hpte_data
.destination_address
+= addr
;
241 ph
->kernel_data
.destination_address
+= addr
;
245 rc
= rtas_call(ibm_configure_kernel_dump
, 3, 1, NULL
,
246 2, ph
, sizeof(struct phyp_dump_header
));
247 } while (rtas_busy_delay(rc
));
250 printk(KERN_ERR
"phyp-dump: unexpected error (%d) "
251 "on invalidate\n", rc
);
252 print_dump_header(ph
);
256 /* ------------------------------------------------- */
258 * release_memory_range -- release memory previously memblock_reserved
259 * @start_pfn: starting physical frame number
260 * @nr_pages: number of pages to free.
262 * This routine will release memory that had been previously
263 * memblock_reserved in early boot. The released memory becomes
264 * available for genreal use.
266 static void release_memory_range(unsigned long start_pfn
,
267 unsigned long nr_pages
)
270 unsigned long end_pfn
;
273 end_pfn
= start_pfn
+ nr_pages
;
275 for (i
= start_pfn
; i
<= end_pfn
; i
++) {
276 rpage
= pfn_to_page(i
);
277 if (PageReserved(rpage
)) {
278 ClearPageReserved(rpage
);
279 init_page_count(rpage
);
287 * track_freed_range -- Counts the range being freed.
288 * Once the counter goes to zero, it re-registers dump for
292 track_freed_range(unsigned long addr
, unsigned long length
)
294 static unsigned long scratch_area_size
, reserved_area_size
;
296 if (addr
< phyp_dump_info
->init_reserve_start
)
299 if ((addr
>= phyp_dump_info
->init_reserve_start
) &&
300 (addr
<= phyp_dump_info
->init_reserve_start
+
301 phyp_dump_info
->init_reserve_size
))
302 reserved_area_size
+= length
;
304 if ((addr
>= phyp_dump_info
->reserved_scratch_addr
) &&
305 (addr
<= phyp_dump_info
->reserved_scratch_addr
+
306 phyp_dump_info
->reserved_scratch_size
))
307 scratch_area_size
+= length
;
309 if ((reserved_area_size
== phyp_dump_info
->init_reserve_size
) &&
310 (scratch_area_size
== phyp_dump_info
->reserved_scratch_size
)) {
312 invalidate_last_dump(&phdr
,
313 phyp_dump_info
->reserved_scratch_addr
);
314 register_dump_area(&phdr
,
315 phyp_dump_info
->reserved_scratch_addr
);
319 /* ------------------------------------------------- */
321 * sysfs_release_region -- sysfs interface to release memory range.
324 * "echo <start addr> <length> > /sys/kernel/release_region"
327 * "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
329 * will release 256MB starting at 1GB.
331 static ssize_t
store_release_region(struct kobject
*kobj
,
332 struct kobj_attribute
*attr
,
333 const char *buf
, size_t count
)
335 unsigned long start_addr
, length
, end_addr
;
336 unsigned long start_pfn
, nr_pages
;
339 ret
= sscanf(buf
, "%lx %lx", &start_addr
, &length
);
343 track_freed_range(start_addr
, length
);
345 /* Range-check - don't free any reserved memory that
346 * wasn't reserved for phyp-dump */
347 if (start_addr
< phyp_dump_info
->init_reserve_start
)
348 start_addr
= phyp_dump_info
->init_reserve_start
;
350 end_addr
= phyp_dump_info
->init_reserve_start
+
351 phyp_dump_info
->init_reserve_size
;
352 if (start_addr
+length
> end_addr
)
353 length
= end_addr
- start_addr
;
355 /* Release the region of memory assed in by user */
356 start_pfn
= PFN_DOWN(start_addr
);
357 nr_pages
= PFN_DOWN(length
);
358 release_memory_range(start_pfn
, nr_pages
);
363 static ssize_t
show_release_region(struct kobject
*kobj
,
364 struct kobj_attribute
*attr
, char *buf
)
366 u64 second_addr_range
;
368 /* total reserved size - start of scratch area */
369 second_addr_range
= phyp_dump_info
->init_reserve_size
-
370 phyp_dump_info
->reserved_scratch_size
;
371 return sprintf(buf
, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:"
372 " DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n",
373 phdr
.cpu_data
.destination_address
,
374 phdr
.cpu_data
.length_copied
,
375 phdr
.hpte_data
.destination_address
,
376 phdr
.hpte_data
.length_copied
,
377 phdr
.kernel_data
.destination_address
,
378 phdr
.kernel_data
.length_copied
,
379 phyp_dump_info
->init_reserve_start
,
383 static struct kobj_attribute rr
= __ATTR(release_region
, 0600,
385 store_release_region
);
387 static int __init
phyp_dump_setup(void)
389 struct device_node
*rtas
;
390 const struct phyp_dump_header
*dump_header
= NULL
;
391 unsigned long dump_area_start
;
392 unsigned long dump_area_length
;
396 /* If no memory was reserved in early boot, there is nothing to do */
397 if (phyp_dump_info
->init_reserve_size
== 0)
400 /* Return if phyp dump not supported */
401 if (!phyp_dump_info
->phyp_dump_configured
)
404 /* Is there dump data waiting for us? If there isn't,
405 * then register a new dump area, and release all of
406 * the rest of the reserved ram.
408 * The /rtas/ibm,kernel-dump rtas node is present only
409 * if there is dump data waiting for us.
411 rtas
= of_find_node_by_path("/rtas");
413 dump_header
= of_get_property(rtas
, "ibm,kernel-dump",
418 ibm_configure_kernel_dump
= rtas_token("ibm,configure-kernel-dump");
420 print_dump_header(dump_header
);
421 dump_area_length
= init_dump_header(&phdr
);
423 dump_area_start
= phyp_dump_info
->init_reserve_start
& PAGE_MASK
;
425 if (dump_header
== NULL
) {
426 register_dump_area(&phdr
, dump_area_start
);
430 /* re-register the dump area, if old dump was invalid */
431 if ((dump_header
) && (dump_header
->status
& DUMP_ERROR_FLAG
)) {
432 invalidate_last_dump(&phdr
, dump_area_start
);
433 register_dump_area(&phdr
, dump_area_start
);
438 phyp_dump_info
->reserved_scratch_addr
=
439 dump_header
->cpu_data
.destination_address
;
440 phyp_dump_info
->reserved_scratch_size
=
441 dump_header
->cpu_data
.source_length
+
442 dump_header
->hpte_data
.source_length
+
443 dump_header
->kernel_data
.source_length
;
446 /* Should we create a dump_subsys, analogous to s390/ipl.c ? */
447 rc
= sysfs_create_file(kernel_kobj
, &rr
.attr
);
449 printk(KERN_ERR
"phyp-dump: unable to create sysfs file (%d)\n",
452 /* ToDo: re-register the dump area, for next time. */
455 machine_subsys_initcall(pseries
, phyp_dump_setup
);
457 int __init
early_init_dt_scan_phyp_dump(unsigned long node
,
458 const char *uname
, int depth
, void *data
)
460 const unsigned int *sizes
;
462 phyp_dump_info
->phyp_dump_configured
= 0;
463 phyp_dump_info
->phyp_dump_is_active
= 0;
465 if (depth
!= 1 || strcmp(uname
, "rtas") != 0)
468 if (of_get_flat_dt_prop(node
, "ibm,configure-kernel-dump", NULL
))
469 phyp_dump_info
->phyp_dump_configured
++;
471 if (of_get_flat_dt_prop(node
, "ibm,dump-kernel", NULL
))
472 phyp_dump_info
->phyp_dump_is_active
++;
474 sizes
= of_get_flat_dt_prop(node
, "ibm,configure-kernel-dump-sizes",
480 phyp_dump_info
->cpu_state_size
= *((unsigned long *)&sizes
[1]);
483 phyp_dump_info
->hpte_region_size
=
484 *((unsigned long *)&sizes
[4]);
488 /* Look for phyp_dump= cmdline option */
489 static int __init
early_phyp_dump_enabled(char *p
)
491 phyp_dump_info
->phyp_dump_at_boot
= 1;
496 if (strncmp(p
, "1", 1) == 0)
497 phyp_dump_info
->phyp_dump_at_boot
= 1;
498 else if (strncmp(p
, "0", 1) == 0)
499 phyp_dump_info
->phyp_dump_at_boot
= 0;
503 early_param("phyp_dump", early_phyp_dump_enabled
);
505 /* Look for phyp_dump_reserve_size= cmdline option */
506 static int __init
early_phyp_dump_reserve_size(char *p
)
509 phyp_dump_info
->reserve_bootvar
= memparse(p
, &p
);
513 early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size
);