8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / cmd / savecore / savecore.c
blobe1d3589acc883cd0431692e5083ea2851d9a206a
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Joyent, Inc.
26 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdarg.h>
32 #include <unistd.h>
33 #include <fcntl.h>
34 #include <errno.h>
35 #include <string.h>
36 #include <deflt.h>
37 #include <time.h>
38 #include <syslog.h>
39 #include <stropts.h>
40 #include <pthread.h>
41 #include <limits.h>
42 #include <atomic.h>
43 #include <libnvpair.h>
44 #include <libintl.h>
45 #include <sys/mem.h>
46 #include <sys/statvfs.h>
47 #include <sys/dumphdr.h>
48 #include <sys/dumpadm.h>
49 #include <sys/compress.h>
50 #include <sys/panic.h>
51 #include <sys/sysmacros.h>
52 #include <sys/stat.h>
53 #include <sys/resource.h>
54 #include <bzip2/bzlib.h>
55 #include <sys/fm/util.h>
56 #include <fm/libfmevent.h>
57 #include <sys/int_fmtio.h>
60 /* fread/fwrite buffer size */
61 #define FBUFSIZE (1ULL << 20)
63 /* minimum size for output buffering */
64 #define MINCOREBLKSIZE (1ULL << 17)
66 /* create this file if metrics collection is enabled in the kernel */
67 #define METRICSFILE "METRICS.csv"
69 static char progname[9] = "savecore";
70 static char *savedir; /* savecore directory */
71 static char *dumpfile; /* source of raw crash dump */
72 static long bounds = -1; /* numeric suffix */
73 static long pagesize; /* dump pagesize */
74 static int dumpfd = -1; /* dumpfile descriptor */
75 static boolean_t have_dumpfile = B_TRUE; /* dumpfile existence */
76 static dumphdr_t corehdr, dumphdr; /* initial and terminal dumphdrs */
77 static boolean_t dump_incomplete; /* dumphdr indicates incomplete */
78 static boolean_t fm_panic; /* dump is the result of fm_panic */
79 static offset_t endoff; /* offset of end-of-dump header */
80 static int verbose; /* chatty mode */
81 static int disregard_valid_flag; /* disregard valid flag */
82 static int livedump; /* dump the current running system */
83 static int interactive; /* user invoked; no syslog */
84 static int csave; /* save dump compressed */
85 static int filemode; /* processing file, not dump device */
86 static int percent_done; /* progress indicator */
87 static int sec_done; /* progress last report time */
88 static hrtime_t startts; /* timestamp at start */
89 static volatile uint64_t saved; /* count of pages written */
90 static volatile uint64_t zpages; /* count of zero pages not written */
91 static dumpdatahdr_t datahdr; /* compression info */
92 static long coreblksize; /* preferred write size (st_blksize) */
93 static int cflag; /* run as savecore -c */
94 static int mflag; /* run as savecore -m */
97 * Payload information for the events we raise. These are used
98 * in raise_event to determine what payload to include.
100 #define SC_PAYLOAD_SAVEDIR 0x0001 /* Include savedir in event */
101 #define SC_PAYLOAD_INSTANCE 0x0002 /* Include bounds instance number */
102 #define SC_PAYLOAD_IMAGEUUID 0x0004 /* Include dump OS instance uuid */
103 #define SC_PAYLOAD_CRASHTIME 0x0008 /* Include epoch crashtime */
104 #define SC_PAYLOAD_PANICSTR 0x0010 /* Include panic string */
105 #define SC_PAYLOAD_PANICSTACK 0x0020 /* Include panic string */
106 #define SC_PAYLOAD_FAILREASON 0x0040 /* Include failure reason */
107 #define SC_PAYLOAD_DUMPCOMPLETE 0x0080 /* Include completeness indicator */
108 #define SC_PAYLOAD_ISCOMPRESSED 0x0100 /* Dump is in vmdump.N form */
109 #define SC_PAYLOAD_DUMPADM_EN 0x0200 /* Is dumpadm enabled or not? */
110 #define SC_PAYLOAD_FM_PANIC 0x0400 /* Panic initiated by FMA */
111 #define SC_PAYLOAD_JUSTCHECKING 0x0800 /* Run with -c flag? */
113 enum sc_event_type {
114 SC_EVENT_DUMP_PENDING,
115 SC_EVENT_SAVECORE_FAILURE,
116 SC_EVENT_DUMP_AVAILABLE
120 * Common payload
122 #define _SC_PAYLOAD_CMN \
123 SC_PAYLOAD_IMAGEUUID | \
124 SC_PAYLOAD_CRASHTIME | \
125 SC_PAYLOAD_PANICSTR | \
126 SC_PAYLOAD_PANICSTACK | \
127 SC_PAYLOAD_DUMPCOMPLETE | \
128 SC_PAYLOAD_FM_PANIC | \
129 SC_PAYLOAD_SAVEDIR
131 static const struct {
132 const char *sce_subclass;
133 uint32_t sce_payload;
134 } sc_event[] = {
136 * SC_EVENT_DUMP_PENDING
139 "dump_pending_on_device",
140 _SC_PAYLOAD_CMN | SC_PAYLOAD_DUMPADM_EN |
141 SC_PAYLOAD_JUSTCHECKING
145 * SC_EVENT_SAVECORE_FAILURE
148 "savecore_failure",
149 _SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_FAILREASON
153 * SC_EVENT_DUMP_AVAILABLE
156 "dump_available",
157 _SC_PAYLOAD_CMN | SC_PAYLOAD_INSTANCE | SC_PAYLOAD_ISCOMPRESSED
161 static void raise_event(enum sc_event_type, char *);
163 static void
164 usage(void)
166 (void) fprintf(stderr,
167 "usage: %s [-Lvd] [-f dumpfile] [dirname]\n", progname);
168 exit(1);
171 #define SC_SL_NONE 0x0001 /* no syslog */
172 #define SC_SL_ERR 0x0002 /* syslog if !interactive, LOG_ERR */
173 #define SC_SL_WARN 0x0004 /* syslog if !interactive, LOG_WARNING */
174 #define SC_IF_VERBOSE 0x0008 /* message only if -v */
175 #define SC_IF_ISATTY 0x0010 /* message only if interactive */
176 #define SC_EXIT_OK 0x0020 /* exit(0) */
177 #define SC_EXIT_ERR 0x0040 /* exit(1) */
178 #define SC_EXIT_PEND 0x0080 /* exit(2) */
179 #define SC_EXIT_FM 0x0100 /* exit(3) */
181 #define _SC_ALLEXIT (SC_EXIT_OK | SC_EXIT_ERR | SC_EXIT_PEND | SC_EXIT_FM)
183 static void
184 logprint(uint32_t flags, char *message, ...)
186 va_list args;
187 char buf[1024];
188 int do_always = ((flags & (SC_IF_VERBOSE | SC_IF_ISATTY)) == 0);
189 int do_ifverb = (flags & SC_IF_VERBOSE) && verbose;
190 int do_ifisatty = (flags & SC_IF_ISATTY) && interactive;
191 int code;
192 static int logprint_raised = 0;
194 if (do_always || do_ifverb || do_ifisatty) {
195 va_start(args, message);
196 /*LINTED: E_SEC_PRINTF_VAR_FMT*/
197 (void) vsnprintf(buf, sizeof (buf), message, args);
198 (void) fprintf(stderr, "%s: %s\n", progname, buf);
199 if (!interactive) {
200 switch (flags & (SC_SL_NONE | SC_SL_ERR | SC_SL_WARN)) {
201 case SC_SL_ERR:
202 /*LINTED: E_SEC_PRINTF_VAR_FMT*/
203 syslog(LOG_ERR, buf);
204 break;
206 case SC_SL_WARN:
207 /*LINTED: E_SEC_PRINTF_VAR_FMT*/
208 syslog(LOG_WARNING, buf);
209 break;
211 default:
212 break;
215 va_end(args);
218 switch (flags & _SC_ALLEXIT) {
219 case 0:
220 return;
222 case SC_EXIT_OK:
223 code = 0;
224 break;
226 case SC_EXIT_PEND:
228 * Raise an ireport saying why we are exiting. Do not
229 * raise if run as savecore -m. If something in the
230 * raise_event codepath calls logprint avoid recursion.
232 if (!mflag && logprint_raised++ == 0)
233 raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
234 code = 2;
235 break;
237 case SC_EXIT_FM:
238 code = 3;
239 break;
241 case SC_EXIT_ERR:
242 default:
243 if (!mflag && logprint_raised++ == 0 && have_dumpfile)
244 raise_event(SC_EVENT_SAVECORE_FAILURE, buf);
245 code = 1;
246 break;
249 exit(code);
253 * System call / libc wrappers that exit on error.
255 static int
256 Open(const char *name, int oflags, mode_t mode)
258 int fd;
260 if ((fd = open64(name, oflags, mode)) == -1)
261 logprint(SC_SL_ERR | SC_EXIT_ERR, "open(\"%s\"): %s",
262 name, strerror(errno));
263 return (fd);
266 static void
267 Fread(void *buf, size_t size, FILE *f)
269 if (fread(buf, size, 1, f) != 1)
270 logprint(SC_SL_ERR | SC_EXIT_ERR, "fread: %s",
271 strerror(errno));
274 static void
275 Fwrite(void *buf, size_t size, FILE *f)
277 if (fwrite(buf, size, 1, f) != 1)
278 logprint(SC_SL_ERR | SC_EXIT_ERR, "fwrite: %s",
279 strerror(errno));
282 static void
283 Fseek(offset_t off, FILE *f)
285 if (fseeko64(f, off, SEEK_SET) != 0)
286 logprint(SC_SL_ERR | SC_EXIT_ERR, "fseeko64: %s",
287 strerror(errno));
290 typedef struct stat64 Stat_t;
292 static void
293 Fstat(int fd, Stat_t *sb, const char *fname)
295 if (fstat64(fd, sb) != 0)
296 logprint(SC_SL_ERR | SC_EXIT_ERR, "fstat(\"%s\"): %s", fname,
297 strerror(errno));
300 static void
301 Stat(const char *fname, Stat_t *sb)
303 if (stat64(fname, sb) != 0) {
304 have_dumpfile = B_FALSE;
305 logprint(SC_SL_ERR | SC_EXIT_ERR, "failed to get status "
306 "of file %s", fname);
310 static void
311 Pread(int fd, void *buf, size_t size, offset_t off)
313 ssize_t sz = pread64(fd, buf, size, off);
315 if (sz < 0)
316 logprint(SC_SL_ERR | SC_EXIT_ERR,
317 "pread: %s", strerror(errno));
318 else if (sz != size)
319 logprint(SC_SL_ERR | SC_EXIT_ERR,
320 "pread: size %ld != %ld", sz, size);
323 static void
324 Pwrite(int fd, void *buf, size_t size, off64_t off)
326 if (pwrite64(fd, buf, size, off) != size)
327 logprint(SC_SL_ERR | SC_EXIT_ERR, "pwrite: %s",
328 strerror(errno));
331 static void *
332 Zalloc(size_t size)
334 void *buf;
336 if ((buf = calloc(size, 1)) == NULL)
337 logprint(SC_SL_ERR | SC_EXIT_ERR, "calloc: %s",
338 strerror(errno));
339 return (buf);
342 static long
343 read_number_from_file(const char *filename, long default_value)
345 long file_value = -1;
346 FILE *fp;
348 if ((fp = fopen(filename, "r")) != NULL) {
349 (void) fscanf(fp, "%ld", &file_value);
350 (void) fclose(fp);
352 return (file_value < 0 ? default_value : file_value);
355 static void
356 read_dumphdr(void)
358 if (filemode)
359 dumpfd = Open(dumpfile, O_RDONLY, 0644);
360 else
361 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
362 endoff = llseek(dumpfd, -DUMP_OFFSET, SEEK_END) & -DUMP_OFFSET;
363 Pread(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
364 Pread(dumpfd, &datahdr, sizeof (datahdr), endoff + sizeof (dumphdr));
366 pagesize = dumphdr.dump_pagesize;
368 if (dumphdr.dump_magic != DUMP_MAGIC)
369 logprint(SC_SL_NONE | SC_EXIT_PEND, "bad magic number %x",
370 dumphdr.dump_magic);
372 if ((dumphdr.dump_flags & DF_VALID) == 0 && !disregard_valid_flag)
373 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_OK,
374 "dump already processed");
376 if (dumphdr.dump_version != DUMP_VERSION)
377 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
378 "dump version (%d) != %s version (%d)",
379 dumphdr.dump_version, progname, DUMP_VERSION);
381 if (dumphdr.dump_wordsize != DUMP_WORDSIZE)
382 logprint(SC_SL_NONE | SC_EXIT_PEND,
383 "dump is from %u-bit kernel - cannot save on %u-bit kernel",
384 dumphdr.dump_wordsize, DUMP_WORDSIZE);
386 if (datahdr.dump_datahdr_magic == DUMP_DATAHDR_MAGIC) {
387 if (datahdr.dump_datahdr_version != DUMP_DATAHDR_VERSION)
388 logprint(SC_SL_NONE | SC_IF_VERBOSE | SC_EXIT_PEND,
389 "dump data version (%d) != %s data version (%d)",
390 datahdr.dump_datahdr_version, progname,
391 DUMP_DATAHDR_VERSION);
392 } else {
393 (void) memset(&datahdr, 0, sizeof (datahdr));
394 datahdr.dump_maxcsize = pagesize;
398 * Read the initial header, clear the valid bits, and compare headers.
399 * The main header may have been overwritten by swapping if we're
400 * using a swap partition as the dump device, in which case we bail.
402 Pread(dumpfd, &corehdr, sizeof (dumphdr_t), dumphdr.dump_start);
404 corehdr.dump_flags &= ~DF_VALID;
405 dumphdr.dump_flags &= ~DF_VALID;
407 if (memcmp(&corehdr, &dumphdr, sizeof (dumphdr_t)) != 0) {
409 * Clear valid bit so we don't complain on every invocation.
411 if (!filemode)
412 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
413 logprint(SC_SL_ERR | SC_EXIT_ERR,
414 "initial dump header corrupt");
418 static void
419 check_space(int csave)
421 struct statvfs fsb;
422 int64_t spacefree, dumpsize, minfree, datasize;
424 if (statvfs(".", &fsb) < 0)
425 logprint(SC_SL_ERR | SC_EXIT_ERR, "statvfs: %s",
426 strerror(errno));
428 dumpsize = dumphdr.dump_data - dumphdr.dump_start;
429 datasize = dumphdr.dump_npages * pagesize;
430 if (!csave)
431 dumpsize += datasize;
432 else
433 dumpsize += datahdr.dump_data_csize;
435 spacefree = (int64_t)fsb.f_bavail * fsb.f_frsize;
436 minfree = 1024LL * read_number_from_file("minfree", 1024);
437 if (spacefree < minfree + dumpsize) {
438 logprint(SC_SL_ERR | SC_EXIT_ERR,
439 "not enough space in %s (%lld MB avail, %lld MB needed)",
440 savedir, spacefree >> 20, (minfree + dumpsize) >> 20);
444 static void
445 build_dump_map(int corefd, const pfn_t *pfn_table)
447 long i;
448 static long misses = 0;
449 size_t dump_mapsize = (corehdr.dump_hashmask + 1) * sizeof (dump_map_t);
450 mem_vtop_t vtop;
451 dump_map_t *dmp = Zalloc(dump_mapsize);
452 char *inbuf = Zalloc(FBUFSIZE);
453 FILE *in = fdopen(dup(dumpfd), "rb");
455 (void) setvbuf(in, inbuf, _IOFBF, FBUFSIZE);
456 Fseek(dumphdr.dump_map, in);
458 corehdr.dump_data = corehdr.dump_map + roundup(dump_mapsize, pagesize);
460 for (i = 0; i < corehdr.dump_nvtop; i++) {
461 long first = 0;
462 long last = corehdr.dump_npages - 1;
463 long middle = 0;
464 pfn_t pfn = 0;
465 uintptr_t h;
467 Fread(&vtop, sizeof (mem_vtop_t), in);
468 while (last >= first) {
469 middle = (first + last) / 2;
470 pfn = pfn_table[middle];
471 if (pfn == vtop.m_pfn)
472 break;
473 if (pfn < vtop.m_pfn)
474 first = middle + 1;
475 else
476 last = middle - 1;
478 if (pfn != vtop.m_pfn) {
479 if (++misses <= 10)
480 (void) fprintf(stderr,
481 "pfn %ld not found for as=%p, va=%p\n",
482 vtop.m_pfn, (void *)vtop.m_as, vtop.m_va);
483 continue;
486 dmp[i].dm_as = vtop.m_as;
487 dmp[i].dm_va = (uintptr_t)vtop.m_va;
488 dmp[i].dm_data = corehdr.dump_data +
489 ((uint64_t)middle << corehdr.dump_pageshift);
491 h = DUMP_HASH(&corehdr, dmp[i].dm_as, dmp[i].dm_va);
492 dmp[i].dm_next = dmp[h].dm_first;
493 dmp[h].dm_first = corehdr.dump_map + i * sizeof (dump_map_t);
496 Pwrite(corefd, dmp, dump_mapsize, corehdr.dump_map);
497 free(dmp);
498 (void) fclose(in);
499 free(inbuf);
503 * Copy whole sections of the dump device to the file.
505 static void
506 Copy(offset_t dumpoff, len_t nb, offset_t *offp, int fd, char *buf,
507 size_t sz)
509 size_t nr;
510 offset_t off = *offp;
512 while (nb > 0) {
513 nr = sz < nb ? sz : (size_t)nb;
514 Pread(dumpfd, buf, nr, dumpoff);
515 Pwrite(fd, buf, nr, off);
516 off += nr;
517 dumpoff += nr;
518 nb -= nr;
520 *offp = off;
524 * Copy pages when the dump data header is missing.
525 * This supports older kernels with latest savecore.
527 static void
528 CopyPages(offset_t *offp, int fd, char *buf, size_t sz)
530 uint32_t csize;
531 FILE *in = fdopen(dup(dumpfd), "rb");
532 FILE *out = fdopen(dup(fd), "wb");
533 char *cbuf = Zalloc(pagesize);
534 char *outbuf = Zalloc(FBUFSIZE);
535 pgcnt_t np = dumphdr.dump_npages;
537 (void) setvbuf(out, outbuf, _IOFBF, FBUFSIZE);
538 (void) setvbuf(in, buf, _IOFBF, sz);
539 Fseek(dumphdr.dump_data, in);
541 Fseek(*offp, out);
542 while (np > 0) {
543 Fread(&csize, sizeof (uint32_t), in);
544 Fwrite(&csize, sizeof (uint32_t), out);
545 *offp += sizeof (uint32_t);
546 if (csize > pagesize || csize == 0) {
547 logprint(SC_SL_ERR,
548 "CopyPages: page %lu csize %d (0x%x) pagesize %d",
549 dumphdr.dump_npages - np, csize, csize,
550 pagesize);
551 break;
553 Fread(cbuf, csize, in);
554 Fwrite(cbuf, csize, out);
555 *offp += csize;
556 np--;
558 (void) fclose(in);
559 (void) fclose(out);
560 free(outbuf);
561 free(buf);
565 * Concatenate dump contents into a new file.
566 * Update corehdr with new offsets.
568 static void
569 copy_crashfile(const char *corefile)
571 int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
572 size_t bufsz = FBUFSIZE;
573 char *inbuf = Zalloc(bufsz);
574 offset_t coreoff;
575 size_t nb;
577 logprint(SC_SL_ERR | SC_IF_VERBOSE,
578 "Copying %s to %s/%s\n", dumpfile, savedir, corefile);
581 * This dump file is still compressed
583 corehdr.dump_flags |= DF_COMPRESSED | DF_VALID;
586 * Leave room for corehdr, it is updated and written last
588 corehdr.dump_start = 0;
589 coreoff = sizeof (corehdr);
592 * Read in the compressed symbol table, copy it to corefile.
594 coreoff = roundup(coreoff, pagesize);
595 corehdr.dump_ksyms = coreoff;
596 Copy(dumphdr.dump_ksyms, dumphdr.dump_ksyms_csize, &coreoff, corefd,
597 inbuf, bufsz);
600 * Save the pfn table.
602 coreoff = roundup(coreoff, pagesize);
603 corehdr.dump_pfn = coreoff;
604 Copy(dumphdr.dump_pfn, dumphdr.dump_npages * sizeof (pfn_t), &coreoff,
605 corefd, inbuf, bufsz);
608 * Save the dump map.
610 coreoff = roundup(coreoff, pagesize);
611 corehdr.dump_map = coreoff;
612 Copy(dumphdr.dump_map, dumphdr.dump_nvtop * sizeof (mem_vtop_t),
613 &coreoff, corefd, inbuf, bufsz);
616 * Save the data pages.
618 coreoff = roundup(coreoff, pagesize);
619 corehdr.dump_data = coreoff;
620 if (datahdr.dump_data_csize != 0)
621 Copy(dumphdr.dump_data, datahdr.dump_data_csize, &coreoff,
622 corefd, inbuf, bufsz);
623 else
624 CopyPages(&coreoff, corefd, inbuf, bufsz);
627 * Now write the modified dump header to front and end of the copy.
628 * Make it look like a valid dump device.
630 * From dumphdr.h: Two headers are written out: one at the
631 * beginning of the dump, and the other at the very end of the
632 * dump device. The terminal header is at a known location
633 * (end of device) so we can always find it.
635 * Pad with zeros to each DUMP_OFFSET boundary.
637 (void) memset(inbuf, 0, DUMP_OFFSET);
639 nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
640 if (nb > 0) {
641 Pwrite(corefd, inbuf, nb, coreoff);
642 coreoff += nb;
645 Pwrite(corefd, &corehdr, sizeof (corehdr), coreoff);
646 coreoff += sizeof (corehdr);
648 Pwrite(corefd, &datahdr, sizeof (datahdr), coreoff);
649 coreoff += sizeof (datahdr);
651 nb = DUMP_OFFSET - (coreoff & (DUMP_OFFSET - 1));
652 if (nb > 0) {
653 Pwrite(corefd, inbuf, nb, coreoff);
656 free(inbuf);
657 Pwrite(corefd, &corehdr, sizeof (corehdr), corehdr.dump_start);
660 * Write out the modified dump header to the dump device.
661 * The dump device has been processed, so DF_VALID is clear.
663 if (!filemode)
664 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
666 (void) close(corefd);
670 * compressed streams
672 typedef struct blockhdr blockhdr_t;
673 typedef struct block block_t;
675 struct blockhdr {
676 block_t *head;
677 block_t *tail;
680 struct block {
681 block_t *next;
682 char *block;
683 int size;
686 typedef enum streamstate {
687 STREAMSTART,
688 STREAMPAGES
689 } streamstate_t;
691 typedef struct stream {
692 streamstate_t state;
693 int init;
694 int tag;
695 int bound;
696 int nout;
697 char *blkbuf;
698 blockhdr_t blocks;
699 pgcnt_t pagenum;
700 pgcnt_t curpage;
701 pgcnt_t npages;
702 pgcnt_t done;
703 bz_stream strm;
704 dumpcsize_t sc;
705 dumpstreamhdr_t sh;
706 } stream_t;
708 static stream_t *streams;
709 static stream_t *endstreams;
711 const int cs = sizeof (dumpcsize_t);
713 typedef struct tinfo {
714 pthread_t tid;
715 int corefd;
716 } tinfo_t;
718 static int threads_stop;
719 static int threads_active;
720 static tinfo_t *tinfo;
721 static tinfo_t *endtinfo;
723 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
724 static pthread_cond_t cvfree = PTHREAD_COND_INITIALIZER;
725 static pthread_cond_t cvwork = PTHREAD_COND_INITIALIZER;
726 static pthread_cond_t cvbarrier = PTHREAD_COND_INITIALIZER;
728 static blockhdr_t freeblocks;
730 static void
731 enqt(blockhdr_t *h, block_t *b)
733 b->next = NULL;
734 if (h->tail == NULL)
735 h->head = b;
736 else
737 h->tail->next = b;
738 h->tail = b;
741 static block_t *
742 deqh(blockhdr_t *h)
744 block_t *b = h->head;
746 if (b != NULL) {
747 h->head = b->next;
748 if (h->head == NULL)
749 h->tail = NULL;
751 return (b);
754 static void *runstreams(void *arg);
756 static void
757 initstreams(int corefd, int nstreams, int maxcsize)
759 int nthreads;
760 int nblocks;
761 int i;
762 block_t *b;
763 tinfo_t *t;
765 nthreads = sysconf(_SC_NPROCESSORS_ONLN);
766 if (nstreams < nthreads)
767 nthreads = nstreams;
768 if (nthreads < 1)
769 nthreads = 1;
770 nblocks = nthreads * 2;
772 tinfo = Zalloc(nthreads * sizeof (tinfo_t));
773 endtinfo = &tinfo[nthreads];
775 /* init streams */
776 streams = Zalloc(nstreams * sizeof (stream_t));
777 endstreams = &streams[nstreams];
779 /* init stream block buffers */
780 for (i = 0; i < nblocks; i++) {
781 b = Zalloc(sizeof (block_t));
782 b->block = Zalloc(maxcsize);
783 enqt(&freeblocks, b);
786 /* init worker threads */
787 (void) pthread_mutex_lock(&lock);
788 threads_active = 1;
789 threads_stop = 0;
790 for (t = tinfo; t != endtinfo; t++) {
791 t->corefd = dup(corefd);
792 if (t->corefd < 0) {
793 nthreads = t - tinfo;
794 endtinfo = t;
795 break;
797 if (pthread_create(&t->tid, NULL, runstreams, t) != 0)
798 logprint(SC_SL_ERR | SC_EXIT_ERR, "pthread_create: %s",
799 strerror(errno));
801 (void) pthread_mutex_unlock(&lock);
804 static void
805 sbarrier()
807 stream_t *s;
809 (void) pthread_mutex_lock(&lock);
810 for (s = streams; s != endstreams; s++) {
811 while (s->bound || s->blocks.head != NULL)
812 (void) pthread_cond_wait(&cvbarrier, &lock);
814 (void) pthread_mutex_unlock(&lock);
817 static void
818 stopstreams()
820 tinfo_t *t;
822 if (threads_active) {
823 sbarrier();
824 (void) pthread_mutex_lock(&lock);
825 threads_stop = 1;
826 (void) pthread_cond_signal(&cvwork);
827 (void) pthread_mutex_unlock(&lock);
828 for (t = tinfo; t != endtinfo; t++)
829 (void) pthread_join(t->tid, NULL);
830 free(tinfo);
831 tinfo = NULL;
832 threads_active = 0;
836 static block_t *
837 getfreeblock()
839 block_t *b;
841 (void) pthread_mutex_lock(&lock);
842 while ((b = deqh(&freeblocks)) == NULL)
843 (void) pthread_cond_wait(&cvfree, &lock);
844 (void) pthread_mutex_unlock(&lock);
845 return (b);
848 /* data page offset from page number */
849 #define BTOP(b) ((b) >> dumphdr.dump_pageshift)
850 #define PTOB(p) ((p) << dumphdr.dump_pageshift)
851 #define DATAOFF(p) (corehdr.dump_data + PTOB(p))
853 /* check for coreblksize boundary */
854 static int
855 isblkbnd(pgcnt_t pgnum)
857 return (P2PHASE(DATAOFF(pgnum), coreblksize) == 0);
860 static int
861 iszpage(char *buf)
863 size_t sz;
864 uint64_t *pl;
866 /*LINTED:E_BAD_PTR_CAST_ALIGN*/
867 pl = (uint64_t *)(buf);
868 for (sz = 0; sz < pagesize; sz += sizeof (*pl))
869 if (*pl++ != 0)
870 return (0);
871 return (1);
874 volatile uint_t *hist;
876 /* write pages to the core file */
877 static void
878 putpage(int corefd, char *buf, pgcnt_t pgnum, pgcnt_t np)
880 atomic_inc_uint(&hist[np]);
881 if (np > 0)
882 Pwrite(corefd, buf, PTOB(np), DATAOFF(pgnum));
886 * Process one lzjb block.
887 * No object (stream header or page) will be split over a block boundary.
889 static void
890 lzjbblock(int corefd, stream_t *s, char *block, size_t blocksz)
892 int in = 0;
893 int csize;
894 int doflush;
895 char *out;
896 size_t dsize;
897 dumpcsize_t sc;
898 dumpstreamhdr_t sh;
900 if (!s->init) {
901 s->init = 1;
902 if (s->blkbuf == NULL)
903 s->blkbuf = Zalloc(coreblksize);
904 s->state = STREAMSTART;
906 while (in < blocksz) {
907 switch (s->state) {
908 case STREAMSTART:
909 (void) memcpy(&sh, block + in, sizeof (sh));
910 in += sizeof (sh);
911 if (strcmp(DUMP_STREAM_MAGIC, sh.stream_magic) != 0)
912 logprint(SC_SL_ERR | SC_EXIT_ERR,
913 "LZJB STREAMSTART: bad stream header");
914 if (sh.stream_npages > datahdr.dump_maxrange)
915 logprint(SC_SL_ERR | SC_EXIT_ERR,
916 "LZJB STREAMSTART: bad range: %d > %d",
917 sh.stream_npages, datahdr.dump_maxrange);
918 s->pagenum = sh.stream_pagenum;
919 s->npages = sh.stream_npages;
920 s->curpage = s->pagenum;
921 s->nout = 0;
922 s->done = 0;
923 s->state = STREAMPAGES;
924 break;
925 case STREAMPAGES:
926 (void) memcpy(&sc, block + in, cs);
927 in += cs;
928 csize = DUMP_GET_CSIZE(sc);
929 if (csize > pagesize)
930 logprint(SC_SL_ERR | SC_EXIT_ERR,
931 "LZJB STREAMPAGES: bad csize=%d", csize);
933 out = s->blkbuf + PTOB(s->nout);
934 dsize = decompress(block + in, out, csize, pagesize);
936 if (dsize != pagesize)
937 logprint(SC_SL_ERR | SC_EXIT_ERR,
938 "LZJB STREAMPAGES: dsize %d != pagesize %d",
939 dsize, pagesize);
941 in += csize;
942 atomic_inc_64(&saved);
944 doflush = 0;
945 if (s->nout == 0 && iszpage(out)) {
946 doflush = 1;
947 atomic_inc_64(&zpages);
948 } else if (++s->nout >= BTOP(coreblksize) ||
949 isblkbnd(s->curpage + s->nout)) {
950 doflush = 1;
952 if (++s->done >= s->npages) {
953 s->state = STREAMSTART;
954 doflush = 1;
956 if (doflush) {
957 putpage(corefd, s->blkbuf, s->curpage, s->nout);
958 s->nout = 0;
959 s->curpage = s->pagenum + s->done;
961 break;
966 /* bzlib library reports errors with this callback */
967 void
968 bz_internal_error(int errcode)
970 logprint(SC_SL_ERR | SC_EXIT_ERR, "bz_internal_error: err %s\n",
971 BZ2_bzErrorString(errcode));
975 * Return one object in the stream.
977 * An object (stream header or page) will likely span an input block
978 * of compression data. Return non-zero when an entire object has been
979 * retrieved from the stream.
981 static int
982 bz2decompress(stream_t *s, void *buf, size_t size)
984 int rc;
986 if (s->strm.avail_out == 0) {
987 s->strm.next_out = buf;
988 s->strm.avail_out = size;
990 while (s->strm.avail_in > 0) {
991 rc = BZ2_bzDecompress(&s->strm);
992 if (rc == BZ_STREAM_END) {
993 rc = BZ2_bzDecompressReset(&s->strm);
994 if (rc != BZ_OK)
995 logprint(SC_SL_ERR | SC_EXIT_ERR,
996 "BZ2_bzDecompressReset: %s",
997 BZ2_bzErrorString(rc));
998 continue;
1001 if (s->strm.avail_out == 0)
1002 break;
1004 return (s->strm.avail_out == 0);
1008 * Process one bzip2 block.
1009 * The interface is documented here:
1010 * http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html
1012 static void
1013 bz2block(int corefd, stream_t *s, char *block, size_t blocksz)
1015 int rc = 0;
1016 int doflush;
1017 char *out;
1019 if (!s->init) {
1020 s->init = 1;
1021 rc = BZ2_bzDecompressInit(&s->strm, 0, 0);
1022 if (rc != BZ_OK)
1023 logprint(SC_SL_ERR | SC_EXIT_ERR,
1024 "BZ2_bzDecompressInit: %s", BZ2_bzErrorString(rc));
1025 if (s->blkbuf == NULL)
1026 s->blkbuf = Zalloc(coreblksize);
1027 s->strm.avail_out = 0;
1028 s->state = STREAMSTART;
1030 s->strm.next_in = block;
1031 s->strm.avail_in = blocksz;
1033 while (s->strm.avail_in > 0) {
1034 switch (s->state) {
1035 case STREAMSTART:
1036 if (!bz2decompress(s, &s->sh, sizeof (s->sh)))
1037 return;
1038 if (strcmp(DUMP_STREAM_MAGIC, s->sh.stream_magic) != 0)
1039 logprint(SC_SL_ERR | SC_EXIT_ERR,
1040 "BZ2 STREAMSTART: bad stream header");
1041 if (s->sh.stream_npages > datahdr.dump_maxrange)
1042 logprint(SC_SL_ERR | SC_EXIT_ERR,
1043 "BZ2 STREAMSTART: bad range: %d > %d",
1044 s->sh.stream_npages, datahdr.dump_maxrange);
1045 s->pagenum = s->sh.stream_pagenum;
1046 s->npages = s->sh.stream_npages;
1047 s->curpage = s->pagenum;
1048 s->nout = 0;
1049 s->done = 0;
1050 s->state = STREAMPAGES;
1051 break;
1052 case STREAMPAGES:
1053 out = s->blkbuf + PTOB(s->nout);
1054 if (!bz2decompress(s, out, pagesize))
1055 return;
1057 atomic_inc_64(&saved);
1059 doflush = 0;
1060 if (s->nout == 0 && iszpage(out)) {
1061 doflush = 1;
1062 atomic_inc_64(&zpages);
1063 } else if (++s->nout >= BTOP(coreblksize) ||
1064 isblkbnd(s->curpage + s->nout)) {
1065 doflush = 1;
1067 if (++s->done >= s->npages) {
1068 s->state = STREAMSTART;
1069 doflush = 1;
1071 if (doflush) {
1072 putpage(corefd, s->blkbuf, s->curpage, s->nout);
1073 s->nout = 0;
1074 s->curpage = s->pagenum + s->done;
1076 break;
1081 /* report progress */
1082 static void
1083 report_progress()
1085 int sec, percent;
1087 if (!interactive)
1088 return;
1090 percent = saved * 100LL / corehdr.dump_npages;
1091 sec = (gethrtime() - startts) / NANOSEC;
1092 if (percent > percent_done || sec > sec_done) {
1093 (void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60,
1094 percent);
1095 (void) fflush(stdout);
1096 sec_done = sec;
1097 percent_done = percent;
1101 /* thread body */
1102 static void *
1103 runstreams(void *arg)
1105 tinfo_t *t = arg;
1106 stream_t *s;
1107 block_t *b;
1108 int bound;
1110 (void) pthread_mutex_lock(&lock);
1111 while (!threads_stop) {
1112 bound = 0;
1113 for (s = streams; s != endstreams; s++) {
1114 if (s->bound || s->blocks.head == NULL)
1115 continue;
1116 s->bound = 1;
1117 bound = 1;
1118 (void) pthread_cond_signal(&cvwork);
1119 while (s->blocks.head != NULL) {
1120 b = deqh(&s->blocks);
1121 (void) pthread_mutex_unlock(&lock);
1123 if (datahdr.dump_clevel < DUMP_CLEVEL_BZIP2)
1124 lzjbblock(t->corefd, s, b->block,
1125 b->size);
1126 else
1127 bz2block(t->corefd, s, b->block,
1128 b->size);
1130 (void) pthread_mutex_lock(&lock);
1131 enqt(&freeblocks, b);
1132 (void) pthread_cond_signal(&cvfree);
1134 report_progress();
1136 s->bound = 0;
1137 (void) pthread_cond_signal(&cvbarrier);
1139 if (!bound && !threads_stop)
1140 (void) pthread_cond_wait(&cvwork, &lock);
1142 (void) close(t->corefd);
1143 (void) pthread_cond_signal(&cvwork);
1144 (void) pthread_mutex_unlock(&lock);
1145 return (arg);
1149 * Process compressed pages.
1151 * The old format, now called single-threaded lzjb, is a 32-bit size
1152 * word followed by 'size' bytes of lzjb compression data for one
1153 * page. The new format extends this by storing a 12-bit "tag" in the
1154 * upper bits of the size word. When the size word is pagesize or
1155 * less, it is assumed to be one lzjb page. When the size word is
1156 * greater than pagesize, it is assumed to be a "stream block",
1157 * belonging to up to 4095 streams. In practice, the number of streams
1158 * is set to one less than the number of CPUs running at crash
1159 * time. One CPU processes the crash dump, the remaining CPUs
1160 * separately process groups of data pages.
1162 * savecore creates a thread per stream, but never more threads than
1163 * the number of CPUs running savecore. This is because savecore can
1164 * be processing a crash file from a remote machine, which may have
1165 * more CPUs.
1167 * When the kernel uses parallel lzjb or parallel bzip2, we expect a
1168 * series of 128KB blocks of compression data. In this case, each
1169 * block has a "tag", in the range 1-4095. Each block is handed off to
1170 * to the threads running "runstreams". The dump format is either lzjb
1171 * or bzip2, never a mixture. These threads, in turn, process the
1172 * compression data for groups of pages. Groups of pages are delimited
1173 * by a "stream header", which indicates a starting pfn and number of
1174 * pages. When a stream block has been read, the condition variable
1175 * "cvwork" is signalled, which causes one of the avaiable threads to
1176 * wake up and process the stream.
1178 * In the parallel case there will be streams blocks encoding all data
1179 * pages. The stream of blocks is terminated by a zero size
1180 * word. There can be a few lzjb pages tacked on the end, depending on
1181 * the architecture. The sbarrier function ensures that all stream
1182 * blocks have been processed so that the page number for the few
1183 * single pages at the end can be known.
1185 static void
1186 decompress_pages(int corefd)
1188 char *cpage = NULL;
1189 char *dpage = NULL;
1190 char *out;
1191 pgcnt_t curpage = 0;
1192 block_t *b;
1193 FILE *dumpf;
1194 FILE *tracef = NULL;
1195 stream_t *s;
1196 size_t dsize;
1197 size_t insz = FBUFSIZE;
1198 char *inbuf = Zalloc(insz);
1199 uint32_t csize;
1200 dumpcsize_t dcsize;
1201 int nstreams = datahdr.dump_nstreams;
1202 int maxcsize = datahdr.dump_maxcsize;
1203 int nout = 0, tag, doflush;
1205 dumpf = fdopen(dup(dumpfd), "rb");
1206 if (dumpf == NULL)
1207 logprint(SC_SL_ERR | SC_EXIT_ERR, "fdopen: %s",
1208 strerror(errno));
1210 (void) setvbuf(dumpf, inbuf, _IOFBF, insz);
1211 Fseek(dumphdr.dump_data, dumpf);
1213 /*LINTED: E_CONSTANT_CONDITION*/
1214 while (1) {
1217 * The csize word delimits stream blocks.
1218 * See dumphdr.h for a description.
1220 Fread(&dcsize, sizeof (dcsize), dumpf);
1222 tag = DUMP_GET_TAG(dcsize);
1223 csize = DUMP_GET_CSIZE(dcsize);
1225 if (tag != 0) { /* a stream block */
1227 if (nstreams == 0)
1228 logprint(SC_SL_ERR | SC_EXIT_ERR,
1229 "starting data header is missing");
1231 if (tag > nstreams)
1232 logprint(SC_SL_ERR | SC_EXIT_ERR,
1233 "stream tag %d not in range 1..%d",
1234 tag, nstreams);
1236 if (csize > maxcsize)
1237 logprint(SC_SL_ERR | SC_EXIT_ERR,
1238 "block size 0x%x > max csize 0x%x",
1239 csize, maxcsize);
1241 if (streams == NULL)
1242 initstreams(corefd, nstreams, maxcsize);
1243 s = &streams[tag - 1];
1244 s->tag = tag;
1246 b = getfreeblock();
1247 b->size = csize;
1248 Fread(b->block, csize, dumpf);
1250 (void) pthread_mutex_lock(&lock);
1251 enqt(&s->blocks, b);
1252 if (!s->bound)
1253 (void) pthread_cond_signal(&cvwork);
1254 (void) pthread_mutex_unlock(&lock);
1256 } else if (csize > 0) { /* one lzjb page */
1258 if (csize > pagesize)
1259 logprint(SC_SL_ERR | SC_EXIT_ERR,
1260 "csize 0x%x > pagesize 0x%x",
1261 csize, pagesize);
1263 if (cpage == NULL)
1264 cpage = Zalloc(pagesize);
1265 if (dpage == NULL) {
1266 dpage = Zalloc(coreblksize);
1267 nout = 0;
1270 Fread(cpage, csize, dumpf);
1272 out = dpage + PTOB(nout);
1273 dsize = decompress(cpage, out, csize, pagesize);
1275 if (dsize != pagesize)
1276 logprint(SC_SL_ERR | SC_EXIT_ERR,
1277 "dsize 0x%x != pagesize 0x%x",
1278 dsize, pagesize);
1281 * wait for streams to flush so that 'saved' is correct
1283 if (threads_active)
1284 sbarrier();
1286 doflush = 0;
1287 if (nout == 0)
1288 curpage = saved;
1290 atomic_inc_64(&saved);
1292 if (nout == 0 && iszpage(dpage)) {
1293 doflush = 1;
1294 atomic_inc_64(&zpages);
1295 } else if (++nout >= BTOP(coreblksize) ||
1296 isblkbnd(curpage + nout) ||
1297 saved >= dumphdr.dump_npages) {
1298 doflush = 1;
1301 if (doflush) {
1302 putpage(corefd, dpage, curpage, nout);
1303 nout = 0;
1306 report_progress();
1309 * Non-streams lzjb does not use blocks. Stop
1310 * here if all the pages have been decompressed.
1312 if (saved >= dumphdr.dump_npages)
1313 break;
1315 } else {
1316 break; /* end of data */
1320 stopstreams();
1321 if (tracef != NULL)
1322 (void) fclose(tracef);
1323 (void) fclose(dumpf);
1324 if (inbuf)
1325 free(inbuf);
1326 if (cpage)
1327 free(cpage);
1328 if (dpage)
1329 free(dpage);
1330 if (streams)
1331 free(streams);
1334 static void
1335 build_corefile(const char *namelist, const char *corefile)
1337 size_t pfn_table_size = dumphdr.dump_npages * sizeof (pfn_t);
1338 size_t ksyms_size = dumphdr.dump_ksyms_size;
1339 size_t ksyms_csize = dumphdr.dump_ksyms_csize;
1340 pfn_t *pfn_table;
1341 char *ksyms_base = Zalloc(ksyms_size);
1342 char *ksyms_cbase = Zalloc(ksyms_csize);
1343 size_t ksyms_dsize;
1344 Stat_t st;
1345 int corefd = Open(corefile, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1346 int namefd = Open(namelist, O_WRONLY | O_CREAT | O_TRUNC, 0644);
1348 (void) printf("Constructing namelist %s/%s\n", savedir, namelist);
1351 * Determine the optimum write size for the core file
1353 Fstat(corefd, &st, corefile);
1355 if (verbose > 1)
1356 (void) printf("%s: %ld block size\n", corefile,
1357 (long)st.st_blksize);
1358 coreblksize = st.st_blksize;
1359 if (coreblksize < MINCOREBLKSIZE || !ISP2(coreblksize))
1360 coreblksize = MINCOREBLKSIZE;
1362 hist = Zalloc((sizeof (uint64_t) * BTOP(coreblksize)) + 1);
1365 * This dump file is now uncompressed
1367 corehdr.dump_flags &= ~DF_COMPRESSED;
1370 * Read in the compressed symbol table, copy it to corefile,
1371 * decompress it, and write the result to namelist.
1373 corehdr.dump_ksyms = pagesize;
1374 Pread(dumpfd, ksyms_cbase, ksyms_csize, dumphdr.dump_ksyms);
1375 Pwrite(corefd, ksyms_cbase, ksyms_csize, corehdr.dump_ksyms);
1377 ksyms_dsize = decompress(ksyms_cbase, ksyms_base, ksyms_csize,
1378 ksyms_size);
1379 if (ksyms_dsize != ksyms_size)
1380 logprint(SC_SL_WARN,
1381 "bad data in symbol table, %lu of %lu bytes saved",
1382 ksyms_dsize, ksyms_size);
1384 Pwrite(namefd, ksyms_base, ksyms_size, 0);
1385 (void) close(namefd);
1386 free(ksyms_cbase);
1387 free(ksyms_base);
1389 (void) printf("Constructing corefile %s/%s\n", savedir, corefile);
1392 * Read in and write out the pfn table.
1394 pfn_table = Zalloc(pfn_table_size);
1395 corehdr.dump_pfn = corehdr.dump_ksyms + roundup(ksyms_size, pagesize);
1396 Pread(dumpfd, pfn_table, pfn_table_size, dumphdr.dump_pfn);
1397 Pwrite(corefd, pfn_table, pfn_table_size, corehdr.dump_pfn);
1400 * Convert the raw translation data into a hashed dump map.
1402 corehdr.dump_map = corehdr.dump_pfn + roundup(pfn_table_size, pagesize);
1403 build_dump_map(corefd, pfn_table);
1404 free(pfn_table);
1407 * Decompress the pages
1409 decompress_pages(corefd);
1410 (void) printf(": %ld of %ld pages saved\n", (pgcnt_t)saved,
1411 dumphdr.dump_npages);
1413 if (verbose)
1414 (void) printf("%ld (%ld%%) zero pages were not written\n",
1415 (pgcnt_t)zpages, (pgcnt_t)zpages * 100 /
1416 dumphdr.dump_npages);
1418 if (saved != dumphdr.dump_npages)
1419 logprint(SC_SL_WARN, "bad data after page %ld", saved);
1422 * Write out the modified dump headers.
1424 Pwrite(corefd, &corehdr, sizeof (corehdr), 0);
1425 if (!filemode)
1426 Pwrite(dumpfd, &dumphdr, sizeof (dumphdr), endoff);
1428 (void) close(corefd);
1432 * When the system panics, the kernel saves all undelivered messages (messages
1433 * that never made it out to syslogd(1M)) in the dump. At a mimimum, the
1434 * panic message itself will always fall into this category. Upon reboot,
1435 * the syslog startup script runs savecore -m to recover these messages.
1437 * To do this, we read the unsent messages from the dump and send them to
1438 * /dev/conslog on priority band 1. This has the effect of prepending them
1439 * to any already-accumulated messages in the console backlog, thus preserving
1440 * temporal ordering across the reboot.
1442 * Note: since savecore -m is used *only* for this purpose, it does *not*
1443 * attempt to save the crash dump. The dump will be saved later, after
1444 * syslogd(1M) starts, by the savecore startup script.
1446 static int
1447 message_save(void)
1449 offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE);
1450 offset_t ldoff;
1451 log_dump_t ld;
1452 log_ctl_t lc;
1453 struct strbuf ctl, dat;
1454 int logfd;
1456 logfd = Open("/dev/conslog", O_WRONLY, 0644);
1457 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
1458 dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET;
1460 ctl.buf = (void *)&lc;
1461 ctl.len = sizeof (log_ctl_t);
1463 dat.buf = Zalloc(DUMP_LOGSIZE);
1465 for (;;) {
1466 ldoff = dumpoff;
1468 Pread(dumpfd, &ld, sizeof (log_dump_t), dumpoff);
1469 dumpoff += sizeof (log_dump_t);
1470 dat.len = ld.ld_msgsize;
1472 if (ld.ld_magic == 0)
1473 break;
1475 if (ld.ld_magic != LOG_MAGIC)
1476 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR,
1477 "bad magic %x", ld.ld_magic);
1479 if (dat.len >= DUMP_LOGSIZE)
1480 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_ERR,
1481 "bad size %d", ld.ld_msgsize);
1483 Pread(dumpfd, ctl.buf, ctl.len, dumpoff);
1484 dumpoff += ctl.len;
1486 if (ld.ld_csum != checksum32(ctl.buf, ctl.len))
1487 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK,
1488 "bad log_ctl checksum");
1490 lc.flags |= SL_LOGONLY;
1492 Pread(dumpfd, dat.buf, dat.len, dumpoff);
1493 dumpoff += dat.len;
1495 if (ld.ld_msum != checksum32(dat.buf, dat.len))
1496 logprint(SC_SL_ERR | SC_IF_VERBOSE | SC_EXIT_OK,
1497 "bad message checksum");
1499 if (putpmsg(logfd, &ctl, &dat, 1, MSG_BAND) == -1)
1500 logprint(SC_SL_ERR | SC_EXIT_ERR, "putpmsg: %s",
1501 strerror(errno));
1503 ld.ld_magic = 0; /* clear magic so we never save twice */
1504 Pwrite(dumpfd, &ld, sizeof (log_dump_t), ldoff);
1506 return (0);
1509 static long
1510 getbounds(const char *f)
1512 long b = -1;
1513 const char *p = strrchr(f, '/');
1515 if (p == NULL || strncmp(p, "vmdump", 6) != 0)
1516 p = strstr(f, "vmdump");
1518 if (p != NULL && *p == '/')
1519 p++;
1521 (void) sscanf(p ? p : f, "vmdump.%ld", &b);
1523 return (b);
1526 static void
1527 stack_retrieve(char *stack)
1529 summary_dump_t sd;
1530 offset_t dumpoff = -(DUMP_OFFSET + DUMP_LOGSIZE +
1531 DUMP_ERPTSIZE);
1532 dumpoff -= DUMP_SUMMARYSIZE;
1534 dumpfd = Open(dumpfile, O_RDWR | O_DSYNC, 0644);
1535 dumpoff = llseek(dumpfd, dumpoff, SEEK_END) & -DUMP_OFFSET;
1537 Pread(dumpfd, &sd, sizeof (summary_dump_t), dumpoff);
1538 dumpoff += sizeof (summary_dump_t);
1540 if (sd.sd_magic == 0) {
1541 *stack = '\0';
1542 return;
1545 if (sd.sd_magic != SUMMARY_MAGIC) {
1546 *stack = '\0';
1547 logprint(SC_SL_NONE | SC_IF_VERBOSE,
1548 "bad summary magic %x", sd.sd_magic);
1549 return;
1551 Pread(dumpfd, stack, STACK_BUF_SIZE, dumpoff);
1552 if (sd.sd_ssum != checksum32(stack, STACK_BUF_SIZE))
1553 logprint(SC_SL_NONE | SC_IF_VERBOSE, "bad stack checksum");
1556 static void
1557 raise_event(enum sc_event_type evidx, char *warn_string)
1559 uint32_t pl = sc_event[evidx].sce_payload;
1560 char panic_stack[STACK_BUF_SIZE];
1561 nvlist_t *attr = NULL;
1562 char uuidbuf[36 + 1];
1563 int err = 0;
1565 if (nvlist_alloc(&attr, NV_UNIQUE_NAME, 0) != 0)
1566 goto publish; /* try to send payload-free event */
1568 if (pl & SC_PAYLOAD_SAVEDIR && savedir != NULL)
1569 err |= nvlist_add_string(attr, "dumpdir", savedir);
1571 if (pl & SC_PAYLOAD_INSTANCE && bounds != -1)
1572 err |= nvlist_add_int64(attr, "instance", bounds);
1574 if (pl & SC_PAYLOAD_ISCOMPRESSED) {
1575 err |= nvlist_add_boolean_value(attr, "compressed",
1576 csave ? B_TRUE : B_FALSE);
1579 if (pl & SC_PAYLOAD_DUMPADM_EN) {
1580 char *disabled = defread("DUMPADM_ENABLE=no");
1582 err |= nvlist_add_boolean_value(attr, "savecore-enabled",
1583 disabled ? B_FALSE : B_TRUE);
1586 if (pl & SC_PAYLOAD_IMAGEUUID) {
1587 (void) strncpy(uuidbuf, corehdr.dump_uuid, 36);
1588 uuidbuf[36] = '\0';
1589 err |= nvlist_add_string(attr, "os-instance-uuid", uuidbuf);
1592 if (pl & SC_PAYLOAD_CRASHTIME) {
1593 err |= nvlist_add_int64(attr, "crashtime",
1594 (int64_t)corehdr.dump_crashtime);
1597 if (pl & SC_PAYLOAD_PANICSTR && corehdr.dump_panicstring[0] != '\0') {
1598 err |= nvlist_add_string(attr, "panicstr",
1599 corehdr.dump_panicstring);
1602 if (pl & SC_PAYLOAD_PANICSTACK) {
1603 stack_retrieve(panic_stack);
1605 if (panic_stack[0] != '\0') {
1607 * The summary page may not be present if the dump
1608 * was previously recorded compressed.
1610 (void) nvlist_add_string(attr, "panicstack",
1611 panic_stack);
1615 /* add warning string if this is an ireport for dump failure */
1616 if (pl & SC_PAYLOAD_FAILREASON && warn_string != NULL)
1617 (void) nvlist_add_string(attr, "failure-reason", warn_string);
1619 if (pl & SC_PAYLOAD_DUMPCOMPLETE)
1620 err |= nvlist_add_boolean_value(attr, "dump-incomplete",
1621 dump_incomplete ? B_TRUE : B_FALSE);
1623 if (pl & SC_PAYLOAD_FM_PANIC) {
1624 err |= nvlist_add_boolean_value(attr, "fm-panic",
1625 fm_panic ? B_TRUE : B_FALSE);
1628 if (pl & SC_PAYLOAD_JUSTCHECKING) {
1629 err |= nvlist_add_boolean_value(attr, "will-attempt-savecore",
1630 cflag ? B_FALSE : B_TRUE);
1633 if (err)
1634 logprint(SC_SL_WARN, "Errors while constructing '%s' "
1635 "event payload; will try to publish anyway.");
1636 publish:
1637 if (fmev_rspublish_nvl(FMEV_RULESET_ON_SUNOS,
1638 "panic", sc_event[evidx].sce_subclass, FMEV_HIPRI,
1639 attr) != FMEV_SUCCESS) {
1640 logprint(SC_SL_ERR, "failed to publish '%s' event: %s",
1641 sc_event[evidx].sce_subclass, fmev_strerror(fmev_errno));
1642 nvlist_free(attr);
1649 main(int argc, char *argv[])
1651 int i, c, bfd;
1652 Stat_t st;
1653 struct rlimit rl;
1654 long filebounds = -1;
1655 char namelist[30], corefile[30], boundstr[30];
1656 dumpfile = NULL;
1658 startts = gethrtime();
1660 (void) getrlimit(RLIMIT_NOFILE, &rl);
1661 rl.rlim_cur = rl.rlim_max;
1662 (void) setrlimit(RLIMIT_NOFILE, &rl);
1664 openlog(progname, LOG_ODELAY, LOG_AUTH);
1666 (void) defopen("/etc/dumpadm.conf");
1667 savedir = defread("DUMPADM_SAVDIR=");
1668 if (savedir != NULL)
1669 savedir = strdup(savedir);
1671 while ((c = getopt(argc, argv, "Lvcdmf:")) != EOF) {
1672 switch (c) {
1673 case 'L':
1674 livedump++;
1675 break;
1676 case 'v':
1677 verbose++;
1678 break;
1679 case 'c':
1680 cflag++;
1681 break;
1682 case 'd':
1683 disregard_valid_flag++;
1684 break;
1685 case 'm':
1686 mflag++;
1687 break;
1688 case 'f':
1689 dumpfile = optarg;
1690 filebounds = getbounds(dumpfile);
1691 break;
1692 case '?':
1693 usage();
1698 * If doing something other than extracting an existing dump (i.e.
1699 * dumpfile has been provided as an option), the user must be root.
1701 if (geteuid() != 0 && dumpfile == NULL) {
1702 (void) fprintf(stderr, "%s: %s %s\n", progname,
1703 gettext("you must be root to use"), progname);
1704 exit(1);
1707 interactive = isatty(STDOUT_FILENO);
1709 if (cflag && livedump)
1710 usage();
1712 if (dumpfile == NULL || livedump)
1713 dumpfd = Open("/dev/dump", O_RDONLY, 0444);
1715 if (dumpfile == NULL) {
1716 dumpfile = Zalloc(MAXPATHLEN);
1717 if (ioctl(dumpfd, DIOCGETDEV, dumpfile) == -1) {
1718 have_dumpfile = B_FALSE;
1719 logprint(SC_SL_NONE | SC_IF_ISATTY | SC_EXIT_ERR,
1720 "no dump device configured");
1724 if (mflag)
1725 return (message_save());
1727 if (optind == argc - 1)
1728 savedir = argv[optind];
1730 if (savedir == NULL || optind < argc - 1)
1731 usage();
1733 if (livedump && ioctl(dumpfd, DIOCDUMP, NULL) == -1)
1734 logprint(SC_SL_NONE | SC_EXIT_ERR,
1735 "dedicated dump device required");
1737 (void) close(dumpfd);
1738 dumpfd = -1;
1740 Stat(dumpfile, &st);
1742 filemode = S_ISREG(st.st_mode);
1744 if (!filemode && defread("DUMPADM_CSAVE=off") == NULL)
1745 csave = 1;
1747 read_dumphdr();
1750 * We want this message to go to the log file, but not the console.
1751 * There's no good way to do that with the existing syslog facility.
1752 * We could extend it to handle this, but there doesn't seem to be
1753 * a general need for it, so we isolate the complexity here instead.
1755 if (dumphdr.dump_panicstring[0] != '\0') {
1756 int logfd = Open("/dev/conslog", O_WRONLY, 0644);
1757 log_ctl_t lc;
1758 struct strbuf ctl, dat;
1759 char msg[DUMP_PANICSIZE + 100];
1760 char fmt[] = "reboot after panic: %s";
1761 uint32_t msgid;
1763 STRLOG_MAKE_MSGID(fmt, msgid);
1765 /* LINTED: E_SEC_SPRINTF_UNBOUNDED_COPY */
1766 (void) sprintf(msg, "%s: [ID %u FACILITY_AND_PRIORITY] ",
1767 progname, msgid);
1768 /* LINTED: E_SEC_PRINTF_VAR_FMT */
1769 (void) sprintf(msg + strlen(msg), fmt,
1770 dumphdr.dump_panicstring);
1772 lc.pri = LOG_AUTH | LOG_ERR;
1773 lc.flags = SL_CONSOLE | SL_LOGONLY;
1774 lc.level = 0;
1776 ctl.buf = (void *)&lc;
1777 ctl.len = sizeof (log_ctl_t);
1779 dat.buf = (void *)msg;
1780 dat.len = strlen(msg) + 1;
1782 (void) putmsg(logfd, &ctl, &dat, 0);
1783 (void) close(logfd);
1786 if ((dumphdr.dump_flags & DF_COMPLETE) == 0) {
1787 logprint(SC_SL_WARN, "incomplete dump on dump device");
1788 dump_incomplete = B_TRUE;
1791 if (dumphdr.dump_fm_panic)
1792 fm_panic = B_TRUE;
1795 * We have a valid dump on a dump device and know as much about
1796 * it as we're going to at this stage. Raise an event for
1797 * logging and so that FMA can open a case for this panic.
1798 * Avoid this step for FMA-initiated panics - FMA will replay
1799 * ereports off the dump device independently of savecore and
1800 * will make a diagnosis, so we don't want to open two cases
1801 * for the same event. Also avoid raising an event for a
1802 * livedump, or when we inflating a compressed dump.
1804 if (!fm_panic && !livedump && !filemode)
1805 raise_event(SC_EVENT_DUMP_PENDING, NULL);
1807 logprint(SC_SL_WARN, "System dump time: %s",
1808 ctime(&dumphdr.dump_crashtime));
1811 * Option -c is designed for use from svc-dumpadm where we know
1812 * that dumpadm -n is in effect but run savecore -c just to
1813 * get the above dump_pending_on_device event raised. If it is run
1814 * interactively then just print further panic details.
1816 if (cflag) {
1817 char *disabled = defread("DUMPADM_ENABLE=no");
1818 int lvl = interactive ? SC_SL_WARN : SC_SL_ERR;
1819 int ec = fm_panic ? SC_EXIT_FM : SC_EXIT_PEND;
1821 logprint(lvl | ec,
1822 "Panic crashdump pending on dump device%s "
1823 "run savecore(1M) manually to extract. "
1824 "Image UUID %s%s.",
1825 disabled ? " but dumpadm -n in effect;" : ";",
1826 corehdr.dump_uuid,
1827 fm_panic ? "(fault-management initiated)" : "");
1828 /*NOTREACHED*/
1831 if (chdir(savedir) == -1)
1832 logprint(SC_SL_ERR | SC_EXIT_ERR, "chdir(\"%s\"): %s",
1833 savedir, strerror(errno));
1835 check_space(csave);
1837 if (filebounds < 0)
1838 bounds = read_number_from_file("bounds", 0);
1839 else
1840 bounds = filebounds;
1842 if (csave) {
1843 size_t metrics_size = datahdr.dump_metrics;
1845 (void) sprintf(corefile, "vmdump.%ld", bounds);
1847 datahdr.dump_metrics = 0;
1849 logprint(SC_SL_ERR,
1850 "Saving compressed system crash dump in %s/%s",
1851 savedir, corefile);
1853 copy_crashfile(corefile);
1856 * Raise a fault management event that indicates the system
1857 * has panicked. We know a reasonable amount about the
1858 * condition at this time, but the dump is still compressed.
1860 if (!livedump && !fm_panic)
1861 raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1863 if (metrics_size > 0) {
1864 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1865 FILE *mfile = fopen(METRICSFILE, "a");
1866 char *metrics = Zalloc(metrics_size + 1);
1868 Pread(dumpfd, metrics, metrics_size, endoff +
1869 sizeof (dumphdr) + sizeof (datahdr));
1871 if (sec < 1)
1872 sec = 1;
1874 if (mfile == NULL) {
1875 logprint(SC_SL_WARN,
1876 "Can't create %s:\n%s",
1877 METRICSFILE, metrics);
1878 } else {
1879 (void) fprintf(mfile, "[[[[,,,");
1880 for (i = 0; i < argc; i++)
1881 (void) fprintf(mfile, "%s ", argv[i]);
1882 (void) fprintf(mfile, "\n");
1883 (void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1884 dumphdr.dump_utsname.sysname,
1885 dumphdr.dump_utsname.nodename,
1886 dumphdr.dump_utsname.release,
1887 dumphdr.dump_utsname.version,
1888 dumphdr.dump_utsname.machine);
1889 (void) fprintf(mfile, ",,,%s dump time %s\n",
1890 dumphdr.dump_flags & DF_LIVE ? "Live" :
1891 "Crash", ctime(&dumphdr.dump_crashtime));
1892 (void) fprintf(mfile, ",,,%s/%s\n", savedir,
1893 corefile);
1894 (void) fprintf(mfile, "Metrics:\n%s\n",
1895 metrics);
1896 (void) fprintf(mfile, "Copy pages,%ld\n",
1897 dumphdr. dump_npages);
1898 (void) fprintf(mfile, "Copy time,%d\n", sec);
1899 (void) fprintf(mfile, "Copy pages/sec,%ld\n",
1900 dumphdr.dump_npages / sec);
1901 (void) fprintf(mfile, "]]]]\n");
1902 (void) fclose(mfile);
1904 free(metrics);
1907 logprint(SC_SL_ERR,
1908 "Decompress the crash dump with "
1909 "\n'savecore -vf %s/%s'",
1910 savedir, corefile);
1912 } else {
1913 (void) sprintf(namelist, "unix.%ld", bounds);
1914 (void) sprintf(corefile, "vmcore.%ld", bounds);
1916 if (interactive && filebounds >= 0 && access(corefile, F_OK)
1917 == 0)
1918 logprint(SC_SL_NONE | SC_EXIT_ERR,
1919 "%s already exists: remove with "
1920 "'rm -f %s/{unix,vmcore}.%ld'",
1921 corefile, savedir, bounds);
1923 logprint(SC_SL_ERR,
1924 "saving system crash dump in %s/{unix,vmcore}.%ld",
1925 savedir, bounds);
1927 build_corefile(namelist, corefile);
1929 if (!livedump && !filemode && !fm_panic)
1930 raise_event(SC_EVENT_DUMP_AVAILABLE, NULL);
1932 if (access(METRICSFILE, F_OK) == 0) {
1933 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1934 FILE *mfile = fopen(METRICSFILE, "a");
1936 if (sec < 1)
1937 sec = 1;
1939 if (mfile == NULL) {
1940 logprint(SC_SL_WARN,
1941 "Can't create %s: %s",
1942 METRICSFILE, strerror(errno));
1943 } else {
1944 (void) fprintf(mfile, "[[[[,,,");
1945 for (i = 0; i < argc; i++)
1946 (void) fprintf(mfile, "%s ", argv[i]);
1947 (void) fprintf(mfile, "\n");
1948 (void) fprintf(mfile, ",,,%s/%s\n", savedir,
1949 corefile);
1950 (void) fprintf(mfile, ",,,%s %s %s %s %s\n",
1951 dumphdr.dump_utsname.sysname,
1952 dumphdr.dump_utsname.nodename,
1953 dumphdr.dump_utsname.release,
1954 dumphdr.dump_utsname.version,
1955 dumphdr.dump_utsname.machine);
1956 (void) fprintf(mfile,
1957 "Uncompress pages,%"PRIu64"\n", saved);
1958 (void) fprintf(mfile, "Uncompress time,%d\n",
1959 sec);
1960 (void) fprintf(mfile, "Uncompress pages/sec,%"
1961 PRIu64"\n", saved / sec);
1962 (void) fprintf(mfile, "]]]]\n");
1963 (void) fclose(mfile);
1968 if (filebounds < 0) {
1969 (void) sprintf(boundstr, "%ld\n", bounds + 1);
1970 bfd = Open("bounds", O_WRONLY | O_CREAT | O_TRUNC, 0644);
1971 Pwrite(bfd, boundstr, strlen(boundstr), 0);
1972 (void) close(bfd);
1975 if (verbose) {
1976 int sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
1978 (void) printf("%d:%02d dump %s is done\n",
1979 sec / 60, sec % 60,
1980 csave ? "copy" : "decompress");
1983 if (verbose > 1 && hist != NULL) {
1984 int i, nw;
1986 for (i = 1, nw = 0; i <= BTOP(coreblksize); ++i)
1987 nw += hist[i] * i;
1988 (void) printf("pages count %%\n");
1989 for (i = 0; i <= BTOP(coreblksize); ++i) {
1990 if (hist[i] == 0)
1991 continue;
1992 (void) printf("%3d %5u %6.2f\n",
1993 i, hist[i], 100.0 * hist[i] * i / nw);
1997 (void) close(dumpfd);
1998 dumpfd = -1;
2000 return (0);