2 * c 2001 PPC 64 Team, IBM Corp
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * /dev/nvram driver for PPC64
11 * This perhaps should live in drivers/char
15 #include <linux/types.h>
16 #include <linux/errno.h>
17 #include <linux/init.h>
18 #include <linux/spinlock.h>
19 #include <linux/slab.h>
20 #include <linux/kmsg_dump.h>
21 #include <linux/pstore.h>
22 #include <linux/ctype.h>
23 #include <linux/zlib.h>
24 #include <asm/uaccess.h>
25 #include <asm/nvram.h>
28 #include <asm/machdep.h>
30 /* Max bytes to read/write in one go */
34 * Set oops header version to distinguish between old and new format header.
35 * lnx,oops-log partition max size is 4000, header version > 4000 will
36 * help in identifying new header.
38 #define OOPS_HDR_VERSION 5000
40 static unsigned int nvram_size
;
41 static int nvram_fetch
, nvram_store
;
42 static char nvram_buf
[NVRW_CNT
]; /* assume this is in the first 4GB */
43 static DEFINE_SPINLOCK(nvram_lock
);
50 struct nvram_os_partition
{
52 int req_size
; /* desired size, in bytes */
53 int min_size
; /* minimum acceptable size (0 means req_size) */
54 long size
; /* size of data portion (excluding err_log_info) */
55 long index
; /* offset of data portion of partition */
56 bool os_partition
; /* partition initialized by OS, not FW */
59 static struct nvram_os_partition rtas_log_partition
= {
60 .name
= "ibm,rtas-log",
67 static struct nvram_os_partition oops_log_partition
= {
68 .name
= "lnx,oops-log",
75 static const char *pseries_nvram_os_partitions
[] = {
81 struct oops_log_info
{
85 } __attribute__((packed
));
87 static void oops_to_nvram(struct kmsg_dumper
*dumper
,
88 enum kmsg_dump_reason reason
);
90 static struct kmsg_dumper nvram_kmsg_dumper
= {
94 /* See clobbering_unread_rtas_event() */
95 #define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */
96 static unsigned long last_unread_rtas_event
; /* timestamp */
99 * For capturing and compressing an oops or panic report...
101 * big_oops_buf[] holds the uncompressed text we're capturing.
103 * oops_buf[] holds the compressed text, preceded by a oops header.
104 * oops header has u16 holding the version of oops header (to differentiate
105 * between old and new format header) followed by u16 holding the length of
106 * the compressed* text (*Or uncompressed, if compression fails.) and u64
107 * holding the timestamp. oops_buf[] gets written to NVRAM.
109 * oops_log_info points to the header. oops_data points to the compressed text.
114 * +-----------+-----------+-----------+------------------------+
115 * | version | length | timestamp | text |
116 * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes) |
117 * +-----------+-----------+-----------+------------------------+
121 * We preallocate these buffers during init to avoid kmalloc during oops/panic.
123 static size_t big_oops_buf_sz
;
124 static char *big_oops_buf
, *oops_buf
;
125 static char *oops_data
;
126 static size_t oops_data_sz
;
128 /* Compression parameters */
129 #define COMPR_LEVEL 6
130 #define WINDOW_BITS 12
132 static struct z_stream_s stream
;
135 static struct nvram_os_partition of_config_partition
= {
138 .os_partition
= false
141 static struct nvram_os_partition common_partition
= {
144 .os_partition
= false
147 static enum pstore_type_id nvram_type_ids
[] = {
149 PSTORE_TYPE_PPC_RTAS
,
151 PSTORE_TYPE_PPC_COMMON
,
154 static int read_type
;
155 static unsigned long last_rtas_event
;
158 static ssize_t
pSeries_nvram_read(char *buf
, size_t count
, loff_t
*index
)
167 if (nvram_size
== 0 || nvram_fetch
== RTAS_UNKNOWN_SERVICE
)
170 if (*index
>= nvram_size
)
174 if (i
+ count
> nvram_size
)
175 count
= nvram_size
- i
;
177 spin_lock_irqsave(&nvram_lock
, flags
);
179 for (; count
!= 0; count
-= len
) {
184 if ((rtas_call(nvram_fetch
, 3, 2, &done
, i
, __pa(nvram_buf
),
185 len
) != 0) || len
!= done
) {
186 spin_unlock_irqrestore(&nvram_lock
, flags
);
190 memcpy(p
, nvram_buf
, len
);
196 spin_unlock_irqrestore(&nvram_lock
, flags
);
202 static ssize_t
pSeries_nvram_write(char *buf
, size_t count
, loff_t
*index
)
210 if (nvram_size
== 0 || nvram_store
== RTAS_UNKNOWN_SERVICE
)
213 if (*index
>= nvram_size
)
217 if (i
+ count
> nvram_size
)
218 count
= nvram_size
- i
;
220 spin_lock_irqsave(&nvram_lock
, flags
);
222 for (; count
!= 0; count
-= len
) {
227 memcpy(nvram_buf
, p
, len
);
229 if ((rtas_call(nvram_store
, 3, 2, &done
, i
, __pa(nvram_buf
),
230 len
) != 0) || len
!= done
) {
231 spin_unlock_irqrestore(&nvram_lock
, flags
);
238 spin_unlock_irqrestore(&nvram_lock
, flags
);
244 static ssize_t
pSeries_nvram_get_size(void)
246 return nvram_size
? nvram_size
: -ENODEV
;
250 /* nvram_write_os_partition, nvram_write_error_log
252 * We need to buffer the error logs into nvram to ensure that we have
253 * the failure information to decode. If we have a severe error there
254 * is no way to guarantee that the OS or the machine is in a state to
255 * get back to user land and write the error to disk. For example if
256 * the SCSI device driver causes a Machine Check by writing to a bad
257 * IO address, there is no way of guaranteeing that the device driver
258 * is in any state that is would also be able to write the error data
259 * captured to disk, thus we buffer it in NVRAM for analysis on the
262 * In NVRAM the partition containing the error log buffer will looks like:
264 * +-----------+----------+--------+------------+------------------+
265 * | signature | checksum | length | name | data |
266 * |0 |1 |2 3|4 15|16 length-1|
267 * +-----------+----------+--------+------------+------------------+
269 * The 'data' section would look like (in bytes):
270 * +--------------+------------+-----------------------------------+
271 * | event_logged | sequence # | error log |
272 * |0 3|4 7|8 error_log_size-1|
273 * +--------------+------------+-----------------------------------+
275 * event_logged: 0 if event has not been logged to syslog, 1 if it has
276 * sequence #: The unique sequence # for each event. (until it wraps)
277 * error log: The error log from event_scan
279 int nvram_write_os_partition(struct nvram_os_partition
*part
, char * buff
,
280 int length
, unsigned int err_type
, unsigned int error_log_cnt
)
284 struct err_log_info info
;
286 if (part
->index
== -1) {
290 if (length
> part
->size
) {
294 info
.error_type
= cpu_to_be32(err_type
);
295 info
.seq_num
= cpu_to_be32(error_log_cnt
);
297 tmp_index
= part
->index
;
299 rc
= ppc_md
.nvram_write((char *)&info
, sizeof(struct err_log_info
), &tmp_index
);
301 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__
, rc
);
305 rc
= ppc_md
.nvram_write(buff
, length
, &tmp_index
);
307 pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__
, rc
);
314 int nvram_write_error_log(char * buff
, int length
,
315 unsigned int err_type
, unsigned int error_log_cnt
)
317 int rc
= nvram_write_os_partition(&rtas_log_partition
, buff
, length
,
318 err_type
, error_log_cnt
);
320 last_unread_rtas_event
= get_seconds();
322 last_rtas_event
= get_seconds();
329 /* nvram_read_partition
331 * Reads nvram partition for at most 'length'
333 int nvram_read_partition(struct nvram_os_partition
*part
, char *buff
,
334 int length
, unsigned int *err_type
,
335 unsigned int *error_log_cnt
)
339 struct err_log_info info
;
341 if (part
->index
== -1)
344 if (length
> part
->size
)
347 tmp_index
= part
->index
;
349 if (part
->os_partition
) {
350 rc
= ppc_md
.nvram_read((char *)&info
,
351 sizeof(struct err_log_info
),
354 pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__
,
360 rc
= ppc_md
.nvram_read(buff
, length
, &tmp_index
);
362 pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__
, rc
);
366 if (part
->os_partition
) {
367 *error_log_cnt
= be32_to_cpu(info
.seq_num
);
368 *err_type
= be32_to_cpu(info
.error_type
);
374 /* nvram_read_error_log
376 * Reads nvram for error log for at most 'length'
378 int nvram_read_error_log(char *buff
, int length
,
379 unsigned int *err_type
, unsigned int *error_log_cnt
)
381 return nvram_read_partition(&rtas_log_partition
, buff
, length
,
382 err_type
, error_log_cnt
);
385 /* This doesn't actually zero anything, but it sets the event_logged
386 * word to tell that this event is safely in syslog.
388 int nvram_clear_error_log(void)
391 int clear_word
= ERR_FLAG_ALREADY_LOGGED
;
394 if (rtas_log_partition
.index
== -1)
397 tmp_index
= rtas_log_partition
.index
;
399 rc
= ppc_md
.nvram_write((char *)&clear_word
, sizeof(int), &tmp_index
);
401 printk(KERN_ERR
"nvram_clear_error_log: Failed nvram_write (%d)\n", rc
);
404 last_unread_rtas_event
= 0;
409 /* pseries_nvram_init_os_partition
411 * This sets up a partition with an "OS" signature.
413 * The general strategy is the following:
414 * 1.) If a partition with the indicated name already exists...
415 * - If it's large enough, use it.
416 * - Otherwise, recycle it and keep going.
417 * 2.) Search for a free partition that is large enough.
418 * 3.) If there's not a free partition large enough, recycle any obsolete
419 * OS partitions and try again.
420 * 4.) Will first try getting a chunk that will satisfy the requested size.
421 * 5.) If a chunk of the requested size cannot be allocated, then try finding
422 * a chunk that will satisfy the minum needed.
424 * Returns 0 on success, else -1.
426 static int __init
pseries_nvram_init_os_partition(struct nvram_os_partition
433 p
= nvram_find_partition(part
->name
, NVRAM_SIG_OS
, &size
);
435 /* Found one but too small, remove it */
436 if (p
&& size
< part
->min_size
) {
437 pr_info("nvram: Found too small %s partition,"
438 " removing it...\n", part
->name
);
439 nvram_remove_partition(part
->name
, NVRAM_SIG_OS
, NULL
);
443 /* Create one if we didn't find */
445 p
= nvram_create_partition(part
->name
, NVRAM_SIG_OS
,
446 part
->req_size
, part
->min_size
);
448 pr_info("nvram: No room to create %s partition, "
449 "deleting any obsolete OS partitions...\n",
451 nvram_remove_partition(NULL
, NVRAM_SIG_OS
,
452 pseries_nvram_os_partitions
);
453 p
= nvram_create_partition(part
->name
, NVRAM_SIG_OS
,
454 part
->req_size
, part
->min_size
);
459 pr_err("nvram: Failed to find or create %s"
460 " partition, err %d\n", part
->name
, (int)p
);
465 part
->size
= nvram_get_partition_size(p
) - sizeof(struct err_log_info
);
471 * Are we using the ibm,rtas-log for oops/panic reports? And if so,
472 * would logging this oops/panic overwrite an RTAS event that rtas_errd
473 * hasn't had a chance to read and process? Return 1 if so, else 0.
475 * We assume that if rtas_errd hasn't read the RTAS event in
476 * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
478 static int clobbering_unread_rtas_event(void)
480 return (oops_log_partition
.index
== rtas_log_partition
.index
481 && last_unread_rtas_event
482 && get_seconds() - last_unread_rtas_event
<=
483 NVRAM_RTAS_READ_TIMEOUT
);
486 /* Derived from logfs_compress() */
487 static int nvram_compress(const void *in
, void *out
, size_t inlen
,
493 err
= zlib_deflateInit2(&stream
, COMPR_LEVEL
, Z_DEFLATED
, WINDOW_BITS
,
494 MEM_LEVEL
, Z_DEFAULT_STRATEGY
);
499 stream
.avail_in
= inlen
;
501 stream
.next_out
= out
;
502 stream
.avail_out
= outlen
;
503 stream
.total_out
= 0;
505 err
= zlib_deflate(&stream
, Z_FINISH
);
506 if (err
!= Z_STREAM_END
)
509 err
= zlib_deflateEnd(&stream
);
513 if (stream
.total_out
>= stream
.total_in
)
516 ret
= stream
.total_out
;
521 /* Compress the text from big_oops_buf into oops_buf. */
522 static int zip_oops(size_t text_len
)
524 struct oops_log_info
*oops_hdr
= (struct oops_log_info
*)oops_buf
;
525 int zipped_len
= nvram_compress(big_oops_buf
, oops_data
, text_len
,
527 if (zipped_len
< 0) {
528 pr_err("nvram: compression failed; returned %d\n", zipped_len
);
529 pr_err("nvram: logging uncompressed oops/panic report\n");
532 oops_hdr
->version
= cpu_to_be16(OOPS_HDR_VERSION
);
533 oops_hdr
->report_length
= cpu_to_be16(zipped_len
);
534 oops_hdr
->timestamp
= cpu_to_be64(get_seconds());
539 static int nvram_pstore_open(struct pstore_info
*psi
)
541 /* Reset the iterator to start reading partitions again */
547 * nvram_pstore_write - pstore write callback for nvram
548 * @type: Type of message logged
549 * @reason: reason behind dump (oops/panic)
550 * @id: identifier to indicate the write performed
551 * @part: pstore writes data to registered buffer in parts,
552 * part number will indicate the same.
553 * @count: Indicates oops count
554 * @compressed: Flag to indicate the log is compressed
555 * @size: number of bytes written to the registered buffer
556 * @psi: registered pstore_info structure
558 * Called by pstore_dump() when an oops or panic report is logged in the
560 * Returns 0 on successful write.
562 static int nvram_pstore_write(enum pstore_type_id type
,
563 enum kmsg_dump_reason reason
,
564 u64
*id
, unsigned int part
, int count
,
565 bool compressed
, size_t size
,
566 struct pstore_info
*psi
)
569 unsigned int err_type
= ERR_TYPE_KERNEL_PANIC
;
570 struct oops_log_info
*oops_hdr
= (struct oops_log_info
*) oops_buf
;
572 /* part 1 has the recent messages from printk buffer */
573 if (part
> 1 || type
!= PSTORE_TYPE_DMESG
||
574 clobbering_unread_rtas_event())
577 oops_hdr
->version
= cpu_to_be16(OOPS_HDR_VERSION
);
578 oops_hdr
->report_length
= cpu_to_be16(size
);
579 oops_hdr
->timestamp
= cpu_to_be64(get_seconds());
582 err_type
= ERR_TYPE_KERNEL_PANIC_GZ
;
584 rc
= nvram_write_os_partition(&oops_log_partition
, oops_buf
,
585 (int) (sizeof(*oops_hdr
) + size
), err_type
, count
);
595 * Reads the oops/panic report, rtas, of-config and common partition.
596 * Returns the length of the data we read from each partition.
597 * Returns 0 if we've been called before.
599 static ssize_t
nvram_pstore_read(u64
*id
, enum pstore_type_id
*type
,
600 int *count
, struct timespec
*time
, char **buf
,
601 bool *compressed
, struct pstore_info
*psi
)
603 struct oops_log_info
*oops_hdr
;
604 unsigned int err_type
, id_no
, size
= 0;
605 struct nvram_os_partition
*part
= NULL
;
612 switch (nvram_type_ids
[read_type
]) {
613 case PSTORE_TYPE_DMESG
:
614 part
= &oops_log_partition
;
615 *type
= PSTORE_TYPE_DMESG
;
617 case PSTORE_TYPE_PPC_RTAS
:
618 part
= &rtas_log_partition
;
619 *type
= PSTORE_TYPE_PPC_RTAS
;
620 time
->tv_sec
= last_rtas_event
;
623 case PSTORE_TYPE_PPC_OF
:
625 part
= &of_config_partition
;
626 *type
= PSTORE_TYPE_PPC_OF
;
627 *id
= PSTORE_TYPE_PPC_OF
;
631 case PSTORE_TYPE_PPC_COMMON
:
633 part
= &common_partition
;
634 *type
= PSTORE_TYPE_PPC_COMMON
;
635 *id
= PSTORE_TYPE_PPC_COMMON
;
643 if (!part
->os_partition
) {
644 p
= nvram_find_partition(part
->name
, sig
, &size
);
646 pr_err("nvram: Failed to find partition %s, "
647 "err %d\n", part
->name
, (int)p
);
654 buff
= kmalloc(part
->size
, GFP_KERNEL
);
659 if (nvram_read_partition(part
, buff
, part
->size
, &err_type
, &id_no
)) {
666 if (part
->os_partition
)
669 if (nvram_type_ids
[read_type
] == PSTORE_TYPE_DMESG
) {
670 size_t length
, hdr_size
;
672 oops_hdr
= (struct oops_log_info
*)buff
;
673 if (be16_to_cpu(oops_hdr
->version
) < OOPS_HDR_VERSION
) {
674 /* Old format oops header had 2-byte record size */
675 hdr_size
= sizeof(u16
);
676 length
= be16_to_cpu(oops_hdr
->version
);
680 hdr_size
= sizeof(*oops_hdr
);
681 length
= be16_to_cpu(oops_hdr
->report_length
);
682 time
->tv_sec
= be64_to_cpu(oops_hdr
->timestamp
);
685 *buf
= kmalloc(length
, GFP_KERNEL
);
688 memcpy(*buf
, buff
+ hdr_size
, length
);
691 if (err_type
== ERR_TYPE_KERNEL_PANIC_GZ
)
702 static struct pstore_info nvram_pstore_info
= {
703 .owner
= THIS_MODULE
,
705 .open
= nvram_pstore_open
,
706 .read
= nvram_pstore_read
,
707 .write
= nvram_pstore_write
,
710 static int nvram_pstore_init(void)
714 nvram_pstore_info
.buf
= oops_data
;
715 nvram_pstore_info
.bufsize
= oops_data_sz
;
717 rc
= pstore_register(&nvram_pstore_info
);
719 pr_err("nvram: pstore_register() failed, defaults to "
720 "kmsg_dump; returned %d\n", rc
);
725 static int nvram_pstore_init(void)
731 static void __init
nvram_init_oops_partition(int rtas_partition_exists
)
735 rc
= pseries_nvram_init_os_partition(&oops_log_partition
);
737 if (!rtas_partition_exists
)
739 pr_notice("nvram: Using %s partition to log both"
740 " RTAS errors and oops/panic reports\n",
741 rtas_log_partition
.name
);
742 memcpy(&oops_log_partition
, &rtas_log_partition
,
743 sizeof(rtas_log_partition
));
745 oops_buf
= kmalloc(oops_log_partition
.size
, GFP_KERNEL
);
747 pr_err("nvram: No memory for %s partition\n",
748 oops_log_partition
.name
);
751 oops_data
= oops_buf
+ sizeof(struct oops_log_info
);
752 oops_data_sz
= oops_log_partition
.size
- sizeof(struct oops_log_info
);
754 rc
= nvram_pstore_init();
760 * Figure compression (preceded by elimination of each line's <n>
761 * severity prefix) will reduce the oops/panic report to at most
762 * 45% of its original size.
764 big_oops_buf_sz
= (oops_data_sz
* 100) / 45;
765 big_oops_buf
= kmalloc(big_oops_buf_sz
, GFP_KERNEL
);
767 stream
.workspace
= kmalloc(zlib_deflate_workspacesize(
768 WINDOW_BITS
, MEM_LEVEL
), GFP_KERNEL
);
769 if (!stream
.workspace
) {
770 pr_err("nvram: No memory for compression workspace; "
771 "skipping compression of %s partition data\n",
772 oops_log_partition
.name
);
777 pr_err("No memory for uncompressed %s data; "
778 "skipping compression\n", oops_log_partition
.name
);
779 stream
.workspace
= NULL
;
782 rc
= kmsg_dump_register(&nvram_kmsg_dumper
);
784 pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc
);
787 kfree(stream
.workspace
);
791 static int __init
pseries_nvram_init_log_partitions(void)
795 /* Scan nvram for partitions */
796 nvram_scan_partitions();
798 rc
= pseries_nvram_init_os_partition(&rtas_log_partition
);
799 nvram_init_oops_partition(rc
== 0);
802 machine_arch_initcall(pseries
, pseries_nvram_init_log_partitions
);
804 int __init
pSeries_nvram_init(void)
806 struct device_node
*nvram
;
807 const __be32
*nbytes_p
;
808 unsigned int proplen
;
810 nvram
= of_find_node_by_type(NULL
, "nvram");
814 nbytes_p
= of_get_property(nvram
, "#bytes", &proplen
);
815 if (nbytes_p
== NULL
|| proplen
!= sizeof(unsigned int)) {
820 nvram_size
= be32_to_cpup(nbytes_p
);
822 nvram_fetch
= rtas_token("nvram-fetch");
823 nvram_store
= rtas_token("nvram-store");
824 printk(KERN_INFO
"PPC64 nvram contains %d bytes\n", nvram_size
);
827 ppc_md
.nvram_read
= pSeries_nvram_read
;
828 ppc_md
.nvram_write
= pSeries_nvram_write
;
829 ppc_md
.nvram_size
= pSeries_nvram_get_size
;
836 * This is our kmsg_dump callback, called after an oops or panic report
837 * has been written to the printk buffer. We want to capture as much
838 * of the printk buffer as possible. First, capture as much as we can
839 * that we think will compress sufficiently to fit in the lnx,oops-log
840 * partition. If that's too much, go back and capture uncompressed text.
842 static void oops_to_nvram(struct kmsg_dumper
*dumper
,
843 enum kmsg_dump_reason reason
)
845 struct oops_log_info
*oops_hdr
= (struct oops_log_info
*)oops_buf
;
846 static unsigned int oops_count
= 0;
847 static bool panicking
= false;
848 static DEFINE_SPINLOCK(lock
);
851 unsigned int err_type
= ERR_TYPE_KERNEL_PANIC_GZ
;
855 case KMSG_DUMP_RESTART
:
857 case KMSG_DUMP_POWEROFF
:
858 /* These are almost always orderly shutdowns. */
862 case KMSG_DUMP_PANIC
:
865 case KMSG_DUMP_EMERG
:
867 /* Panic report already captured. */
871 pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
872 __FUNCTION__
, (int) reason
);
876 if (clobbering_unread_rtas_event())
879 if (!spin_trylock_irqsave(&lock
, flags
))
883 kmsg_dump_get_buffer(dumper
, false,
884 big_oops_buf
, big_oops_buf_sz
, &text_len
);
885 rc
= zip_oops(text_len
);
888 kmsg_dump_rewind(dumper
);
889 kmsg_dump_get_buffer(dumper
, false,
890 oops_data
, oops_data_sz
, &text_len
);
891 err_type
= ERR_TYPE_KERNEL_PANIC
;
892 oops_hdr
->version
= cpu_to_be16(OOPS_HDR_VERSION
);
893 oops_hdr
->report_length
= cpu_to_be16(text_len
);
894 oops_hdr
->timestamp
= cpu_to_be64(get_seconds());
897 (void) nvram_write_os_partition(&oops_log_partition
, oops_buf
,
898 (int) (sizeof(*oops_hdr
) + text_len
), err_type
,
901 spin_unlock_irqrestore(&lock
, flags
);