4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Fault Management Architecture (FMA) Resource and Protocol Support
29 * The routines contained herein provide services to support kernel subsystems
30 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
32 * Name-Value Pair Lists
34 * The embodiment of an FMA protocol element (event, fmri or authority) is a
35 * name-value pair list (nvlist_t). FMA-specific nvlist construtor and
36 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
37 * to create an nvpair list using custom allocators. Callers may choose to
38 * allocate either from the kernel memory allocator, or from a preallocated
39 * buffer, useful in constrained contexts like high-level interrupt routines.
41 * Protocol Event and FMRI Construction
43 * Convenience routines are provided to construct nvlist events according to
44 * the FMA Event Protocol and Naming Schema specification for ereports and
45 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
49 * Routines to generate ENA formats 0, 1 and 2 are available as well as
50 * routines to increment formats 1 and 2. Individual fields within the
51 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
52 * fm_ena_format_get() and fm_ena_gen_get().
55 #include <sys/types.h>
58 #include <sys/kernel.h>
59 #include <sys/systm.h>
60 #include <sys/sysevent.h>
61 #include <sys/nvpair.h>
62 #include <sys/cmn_err.h>
63 #include <sys/cpuvar.h>
64 #include <sys/sysmacros.h>
65 #include <sys/systm.h>
67 #include <sys/atomic.h>
68 #include <sys/systeminfo.h>
69 #include <sys/sysevent/eventdefs.h>
70 #include <sys/fm/util.h>
71 #include <sys/fm/protocol.h>
74 * URL and SUNW-MSG-ID value to display for fm_panic(), defined below. These
75 * values must be kept in sync with the FMA source code in usr/src/cmd/fm.
77 static const char *fm_url
= "http://www.sun.com/msg";
78 static const char *fm_msgid
= "SUNOS-8000-0G";
79 static char *volatile fm_panicstr
= NULL
;
81 errorq_t
*ereport_errorq
;
83 static uint_t ereport_chanlen
= ERPT_EVCH_MAX
;
84 static evchan_t
*ereport_chan
= NULL
;
85 static ulong_t ereport_qlen
= 0;
86 static size_t ereport_size
= 0;
87 static int ereport_cols
= 80;
90 * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of
91 * output so they aren't split across console lines, and return the end column.
95 fm_printf(int depth
, int c
, int cols
, const char *format
, ...)
101 va_start(ap
, format
);
102 width
= vsnprintf(&c1
, sizeof (c1
), format
, ap
);
105 if (c
+ width
>= cols
) {
108 if (format
[0] != ' ' && depth
> 0) {
114 va_start(ap
, format
);
118 return ((c
+ width
) % cols
);
122 * Recursively print a nvlist in the specified column width and return the
123 * column we end up in. This function is called recursively by fm_nvprint(),
124 * below. We generically format the entire nvpair using hexadecimal
125 * integers and strings, and elide any integer arrays. Arrays are basically
126 * used for cache dumps right now, so we suppress them so as not to overwhelm
127 * the amount of console output we produce at panic time. This can be further
128 * enhanced as FMA technology grows based upon the needs of consumers. All
129 * FMA telemetry is logged using the dump device transport, so the console
130 * output serves only as a fallback in case this procedure is unsuccessful.
133 fm_nvprintr(nvlist_t
*nvl
, int d
, int c
, int cols
)
137 for (nvp
= nvlist_next_nvpair(nvl
, NULL
);
138 nvp
!= NULL
; nvp
= nvlist_next_nvpair(nvl
, nvp
)) {
140 data_type_t type
= nvpair_type(nvp
);
141 const char *name
= nvpair_name(nvp
);
151 if (strcmp(name
, FM_CLASS
) == 0)
152 continue; /* already printed by caller */
154 c
= fm_printf(d
, c
, cols
, " %s=", name
);
157 case DATA_TYPE_BOOLEAN
:
158 c
= fm_printf(d
+ 1, c
, cols
, " 1");
161 case DATA_TYPE_BOOLEAN_VALUE
:
162 (void) nvpair_value_boolean_value(nvp
, &b
);
163 c
= fm_printf(d
+ 1, c
, cols
, b
? "1" : "0");
167 (void) nvpair_value_byte(nvp
, &i8
);
168 c
= fm_printf(d
+ 1, c
, cols
, "%x", i8
);
172 (void) nvpair_value_int8(nvp
, (void *)&i8
);
173 c
= fm_printf(d
+ 1, c
, cols
, "%x", i8
);
176 case DATA_TYPE_UINT8
:
177 (void) nvpair_value_uint8(nvp
, &i8
);
178 c
= fm_printf(d
+ 1, c
, cols
, "%x", i8
);
181 case DATA_TYPE_INT16
:
182 (void) nvpair_value_int16(nvp
, (void *)&i16
);
183 c
= fm_printf(d
+ 1, c
, cols
, "%x", i16
);
186 case DATA_TYPE_UINT16
:
187 (void) nvpair_value_uint16(nvp
, &i16
);
188 c
= fm_printf(d
+ 1, c
, cols
, "%x", i16
);
191 case DATA_TYPE_INT32
:
192 (void) nvpair_value_int32(nvp
, (void *)&i32
);
193 c
= fm_printf(d
+ 1, c
, cols
, "%x", i32
);
196 case DATA_TYPE_UINT32
:
197 (void) nvpair_value_uint32(nvp
, &i32
);
198 c
= fm_printf(d
+ 1, c
, cols
, "%x", i32
);
201 case DATA_TYPE_INT64
:
202 (void) nvpair_value_int64(nvp
, (void *)&i64
);
203 c
= fm_printf(d
+ 1, c
, cols
, "%llx",
207 case DATA_TYPE_UINT64
:
208 (void) nvpair_value_uint64(nvp
, &i64
);
209 c
= fm_printf(d
+ 1, c
, cols
, "%llx",
213 case DATA_TYPE_HRTIME
:
214 (void) nvpair_value_hrtime(nvp
, (void *)&i64
);
215 c
= fm_printf(d
+ 1, c
, cols
, "%llx",
219 case DATA_TYPE_STRING
:
220 (void) nvpair_value_string(nvp
, &str
);
221 c
= fm_printf(d
+ 1, c
, cols
, "\"%s\"",
222 str
? str
: "<NULL>");
225 case DATA_TYPE_NVLIST
:
226 c
= fm_printf(d
+ 1, c
, cols
, "[");
227 (void) nvpair_value_nvlist(nvp
, &cnv
);
228 c
= fm_nvprintr(cnv
, d
+ 1, c
, cols
);
229 c
= fm_printf(d
+ 1, c
, cols
, " ]");
232 case DATA_TYPE_NVLIST_ARRAY
: {
236 c
= fm_printf(d
+ 1, c
, cols
, "[");
237 (void) nvpair_value_nvlist_array(nvp
, &val
, &nelem
);
238 for (i
= 0; i
< nelem
; i
++) {
239 c
= fm_nvprintr(val
[i
], d
+ 1, c
, cols
);
241 c
= fm_printf(d
+ 1, c
, cols
, " ]");
245 case DATA_TYPE_BOOLEAN_ARRAY
:
246 case DATA_TYPE_BYTE_ARRAY
:
247 case DATA_TYPE_INT8_ARRAY
:
248 case DATA_TYPE_UINT8_ARRAY
:
249 case DATA_TYPE_INT16_ARRAY
:
250 case DATA_TYPE_UINT16_ARRAY
:
251 case DATA_TYPE_INT32_ARRAY
:
252 case DATA_TYPE_UINT32_ARRAY
:
253 case DATA_TYPE_INT64_ARRAY
:
254 case DATA_TYPE_UINT64_ARRAY
:
255 case DATA_TYPE_STRING_ARRAY
:
256 c
= fm_printf(d
+ 1, c
, cols
, "[...]");
258 case DATA_TYPE_UNKNOWN
:
259 c
= fm_printf(d
+ 1, c
, cols
, "<unknown>");
268 fm_nvprint(nvlist_t
*nvl
)
275 if (nvlist_lookup_string(nvl
, FM_CLASS
, &class) == 0)
276 c
= fm_printf(0, c
, ereport_cols
, "%s", class);
278 if (fm_nvprintr(nvl
, 0, c
, ereport_cols
) != 0)
285 * Wrapper for panic() that first produces an FMA-style message for admins.
286 * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this
287 * is the one exception to that rule and the only error that gets messaged.
288 * This function is intended for use by subsystems that have detected a fatal
289 * error and enqueued appropriate ereports and wish to then force a panic.
293 fm_panic(const char *format
, ...)
297 (void) atomic_cas_ptr((void *)&fm_panicstr
, NULL
, (void *)format
);
298 va_start(ap
, format
);
299 vcmn_err(CE_PANIC
, format
, ap
);
304 * Print any appropriate FMA banner message before the panic message. This
305 * function is called by panicsys() and prints the message for fm_panic().
306 * We print the message here so that it comes after the system is quiesced.
307 * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix).
308 * The rest of the message is for the console only and not needed in the log,
309 * so it is printed using printf(). We break it up into multiple
310 * chunks so as to avoid overflowing any small legacy prom_printf() buffers.
319 return; /* panic was not initiated by fm_panic(); do nothing */
322 now
= hardclock_ticks
;
324 cmn_err(CE_NOTE
, "!SUNW-MSG-ID: %s, "
325 "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid
);
328 "\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n"
329 "EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n",
330 fm_msgid
, tod
.tv_sec
, tod
.tv_nsec
, (u_longlong_t
)now
);
333 "PLATFORM: %s, CSN: -, HOSTNAME: %s\n"
334 "SOURCE: %s, REV: %s\n",
335 machine
, hostname
, "NetBSD",
339 "DESC: Errors have been detected that require a reboot to ensure system\n"
340 "integrity. See %s/%s for more information.\n",
344 "AUTO-RESPONSE: NetBSD will not attempt to save and diagnose the error telemetry\n"
345 "IMPACT: The system will sync files, save a crash dump if needed, and reboot\n"
346 "REC-ACTION: Save the error summary below\n");
352 * Post an error report (ereport) to the sysevent error channel. The error
353 * channel must be established with a prior call to sysevent_evc_create()
354 * before publication may occur.
357 fm_ereport_post(nvlist_t
*ereport
, int evc_flag
)
360 evchan_t
*error_chan
;
362 (void) nvlist_size(ereport
, &nvl_size
, NV_ENCODE_NATIVE
);
363 if (nvl_size
> ERPT_DATA_SZ
|| nvl_size
== 0) {
364 printf("fm_ereport_post: dropped report\n");
374 * Wrapppers for FM nvlist allocators
378 i_fm_alloc(nv_alloc_t
*nva
, size_t size
)
380 return (kmem_zalloc(size
, KM_SLEEP
));
385 i_fm_free(nv_alloc_t
*nva
, void *buf
, size_t size
)
387 kmem_free(buf
, size
);
390 const nv_alloc_ops_t fm_mem_alloc_ops
= {
399 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer
400 * to the newly allocated nv_alloc_t structure is returned upon success or NULL
401 * is returned to indicate that the nv_alloc structure could not be created.
404 fm_nva_xcreate(char *buf
, size_t bufsz
)
406 nv_alloc_t
*nvhdl
= kmem_zalloc(sizeof (nv_alloc_t
), KM_SLEEP
);
408 if (bufsz
== 0 || nv_alloc_init(nvhdl
, nv_fixed_ops
, buf
, bufsz
) != 0) {
409 kmem_free(nvhdl
, sizeof (nv_alloc_t
));
417 * Destroy a previously allocated nv_alloc structure. The fixed buffer
418 * associated with nva must be freed by the caller.
421 fm_nva_xdestroy(nv_alloc_t
*nva
)
424 kmem_free(nva
, sizeof (nv_alloc_t
));
428 * Create a new nv list. A pointer to a new nv list structure is returned
429 * upon success or NULL is returned to indicate that the structure could
430 * not be created. The newly created nv list is created and managed by the
431 * operations installed in nva. If nva is NULL, the default FMA nva
432 * operations are installed and used.
434 * When called from the kernel and nva == NULL, this function must be called
435 * from passive kernel context with no locks held that can prevent a
436 * sleeping memory allocation from occurring. Otherwise, this function may
437 * be called from other kernel contexts as long a valid nva created via
438 * fm_nva_create() is supplied.
441 fm_nvlist_create(nv_alloc_t
*nva
)
448 nvhdl
= kmem_zalloc(sizeof (nv_alloc_t
), KM_SLEEP
);
450 if (nv_alloc_init(nvhdl
, &fm_mem_alloc_ops
, NULL
, 0) != 0) {
451 kmem_free(nvhdl
, sizeof (nv_alloc_t
));
459 if (nvlist_xalloc(&nvl
, NV_UNIQUE_NAME
, nvhdl
) != 0) {
461 kmem_free(nvhdl
, sizeof (nv_alloc_t
));
462 nv_alloc_fini(nvhdl
);
471 * Destroy a previously allocated nvlist structure. flag indicates whether
472 * or not the associated nva structure should be freed (FM_NVA_FREE) or
473 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows
474 * it to be re-used for future nvlist creation operations.
477 fm_nvlist_destroy(nvlist_t
*nvl
, int flag
)
479 nv_alloc_t
*nva
= nvlist_lookup_nv_alloc(nvl
);
484 if (flag
== FM_NVA_FREE
)
485 fm_nva_xdestroy(nva
);
490 i_fm_payload_set(nvlist_t
*payload
, const char *name
, va_list ap
)
495 while (ret
== 0 && name
!= NULL
) {
496 type
= va_arg(ap
, data_type_t
);
499 ret
= nvlist_add_byte(payload
, name
,
502 case DATA_TYPE_BYTE_ARRAY
:
503 nelem
= va_arg(ap
, int);
504 ret
= nvlist_add_byte_array(payload
, name
,
505 va_arg(ap
, uchar_t
*), nelem
);
507 case DATA_TYPE_BOOLEAN_VALUE
:
508 ret
= nvlist_add_boolean_value(payload
, name
,
509 va_arg(ap
, boolean_t
));
511 case DATA_TYPE_BOOLEAN_ARRAY
:
512 nelem
= va_arg(ap
, int);
513 ret
= nvlist_add_boolean_array(payload
, name
,
514 va_arg(ap
, boolean_t
*), nelem
);
517 ret
= nvlist_add_int8(payload
, name
,
520 case DATA_TYPE_INT8_ARRAY
:
521 nelem
= va_arg(ap
, int);
522 ret
= nvlist_add_int8_array(payload
, name
,
523 va_arg(ap
, int8_t *), nelem
);
525 case DATA_TYPE_UINT8
:
526 ret
= nvlist_add_uint8(payload
, name
,
529 case DATA_TYPE_UINT8_ARRAY
:
530 nelem
= va_arg(ap
, int);
531 ret
= nvlist_add_uint8_array(payload
, name
,
532 va_arg(ap
, uint8_t *), nelem
);
534 case DATA_TYPE_INT16
:
535 ret
= nvlist_add_int16(payload
, name
,
538 case DATA_TYPE_INT16_ARRAY
:
539 nelem
= va_arg(ap
, int);
540 ret
= nvlist_add_int16_array(payload
, name
,
541 va_arg(ap
, int16_t *), nelem
);
543 case DATA_TYPE_UINT16
:
544 ret
= nvlist_add_uint16(payload
, name
,
547 case DATA_TYPE_UINT16_ARRAY
:
548 nelem
= va_arg(ap
, int);
549 ret
= nvlist_add_uint16_array(payload
, name
,
550 va_arg(ap
, uint16_t *), nelem
);
552 case DATA_TYPE_INT32
:
553 ret
= nvlist_add_int32(payload
, name
,
554 va_arg(ap
, int32_t));
556 case DATA_TYPE_INT32_ARRAY
:
557 nelem
= va_arg(ap
, int);
558 ret
= nvlist_add_int32_array(payload
, name
,
559 va_arg(ap
, int32_t *), nelem
);
561 case DATA_TYPE_UINT32
:
562 ret
= nvlist_add_uint32(payload
, name
,
563 va_arg(ap
, uint32_t));
565 case DATA_TYPE_UINT32_ARRAY
:
566 nelem
= va_arg(ap
, int);
567 ret
= nvlist_add_uint32_array(payload
, name
,
568 va_arg(ap
, uint32_t *), nelem
);
570 case DATA_TYPE_INT64
:
571 ret
= nvlist_add_int64(payload
, name
,
572 va_arg(ap
, int64_t));
574 case DATA_TYPE_INT64_ARRAY
:
575 nelem
= va_arg(ap
, int);
576 ret
= nvlist_add_int64_array(payload
, name
,
577 va_arg(ap
, int64_t *), nelem
);
579 case DATA_TYPE_UINT64
:
580 ret
= nvlist_add_uint64(payload
, name
,
581 va_arg(ap
, uint64_t));
583 case DATA_TYPE_UINT64_ARRAY
:
584 nelem
= va_arg(ap
, int);
585 ret
= nvlist_add_uint64_array(payload
, name
,
586 va_arg(ap
, uint64_t *), nelem
);
588 case DATA_TYPE_STRING
:
589 ret
= nvlist_add_string(payload
, name
,
592 case DATA_TYPE_STRING_ARRAY
:
593 nelem
= va_arg(ap
, int);
594 ret
= nvlist_add_string_array(payload
, name
,
595 va_arg(ap
, char **), nelem
);
597 case DATA_TYPE_NVLIST
:
598 ret
= nvlist_add_nvlist(payload
, name
,
599 va_arg(ap
, nvlist_t
*));
601 case DATA_TYPE_NVLIST_ARRAY
:
602 nelem
= va_arg(ap
, int);
603 ret
= nvlist_add_nvlist_array(payload
, name
,
604 va_arg(ap
, nvlist_t
**), nelem
);
610 name
= va_arg(ap
, char *);
616 fm_payload_set(nvlist_t
*payload
, ...)
622 va_start(ap
, payload
);
623 name
= va_arg(ap
, char *);
624 ret
= i_fm_payload_set(payload
, name
, ap
);
628 printf("fm_payload_set: failed\n");
632 * Set-up and validate the members of an ereport event according to:
634 * Member name Type Value
635 * ====================================================
636 * class string ereport
639 * detector nvlist_t <detector>
640 * ereport-payload nvlist_t <var args>
644 fm_ereport_set(nvlist_t
*ereport
, int version
, const char *erpt_class
,
645 uint64_t ena
, const nvlist_t
*detector
, ...)
647 char ereport_class
[FM_MAX_CLASS
];
652 if (version
!= FM_EREPORT_VERS0
) {
653 printf("fm_payload_set: bad version\n");
657 (void) snprintf(ereport_class
, FM_MAX_CLASS
, "%s.%s",
658 FM_EREPORT_CLASS
, erpt_class
);
659 if (nvlist_add_string(ereport
, FM_CLASS
, ereport_class
) != 0) {
660 printf("fm_payload_set: can't add\n");
664 if (nvlist_add_uint64(ereport
, FM_EREPORT_ENA
, ena
)) {
665 printf("fm_payload_set: can't add\n");
668 if (nvlist_add_nvlist(ereport
, FM_EREPORT_DETECTOR
,
669 (nvlist_t
*)detector
) != 0) {
670 printf("fm_payload_set: can't add\n");
673 va_start(ap
, detector
);
674 name
= va_arg(ap
, const char *);
675 ret
= i_fm_payload_set(ereport
, name
, ap
);
679 printf("fm_payload_set: can't add\n");
683 fm_fmri_zfs_set(nvlist_t
*fmri
, int version
, uint64_t pool_guid
,
686 if (version
!= ZFS_SCHEME_VERSION0
) {
687 printf("fm_fmri_zfs_set: bad version\n");
691 if (nvlist_add_uint8(fmri
, FM_VERSION
, version
) != 0) {
692 printf("fm_fmri_zfs_set: can't set\n");
696 if (nvlist_add_string(fmri
, FM_FMRI_SCHEME
, FM_FMRI_SCHEME_ZFS
) != 0) {
697 printf("fm_fmri_zfs_set: can't set\n");
701 if (nvlist_add_uint64(fmri
, FM_FMRI_ZFS_POOL
, pool_guid
) != 0) {
702 printf("fm_fmri_zfs_set: can't set\n");
705 if (vdev_guid
!= 0) {
706 if (nvlist_add_uint64(fmri
, FM_FMRI_ZFS_VDEV
, vdev_guid
) != 0) {
707 printf("fm_fmri_zfs_set: can't set\n");
713 fm_ena_increment(uint64_t ena
)
717 switch (ENA_FORMAT(ena
)) {
719 new_ena
= ena
+ (1 << ENA_FMT1_GEN_SHFT
);
722 new_ena
= ena
+ (1 << ENA_FMT2_GEN_SHFT
);
732 fm_ena_generate_cpu(uint64_t timestamp
, processorid_t cpuid
, uchar_t format
)
739 ena
= (uint64_t)((format
& ENA_FORMAT_MASK
) |
740 ((cpuid
<< ENA_FMT1_CPUID_SHFT
) &
741 ENA_FMT1_CPUID_MASK
) |
742 ((timestamp
<< ENA_FMT1_TIME_SHFT
) &
743 ENA_FMT1_TIME_MASK
));
745 ena
= (uint64_t)((format
& ENA_FORMAT_MASK
) |
746 ((cpuid
<< ENA_FMT1_CPUID_SHFT
) &
747 ENA_FMT1_CPUID_MASK
) |
748 ((hardclock_ticks
<< ENA_FMT1_TIME_SHFT
) &
749 ENA_FMT1_TIME_MASK
));
753 ena
= (uint64_t)((format
& ENA_FORMAT_MASK
) |
754 ((timestamp
<< ENA_FMT2_TIME_SHFT
) & ENA_FMT2_TIME_MASK
));
764 fm_ena_generate(uint64_t timestamp
, uchar_t format
)
766 return (fm_ena_generate_cpu(timestamp
, cpu_index(curcpu()), format
));
770 fm_ena_generation_get(uint64_t ena
)
774 switch (ENA_FORMAT(ena
)) {
776 gen
= (ena
& ENA_FMT1_GEN_MASK
) >> ENA_FMT1_GEN_SHFT
;
779 gen
= (ena
& ENA_FMT2_GEN_MASK
) >> ENA_FMT2_GEN_SHFT
;
790 fm_ena_format_get(uint64_t ena
)
793 return (ENA_FORMAT(ena
));
797 fm_ena_id_get(uint64_t ena
)
801 switch (ENA_FORMAT(ena
)) {
803 id
= (ena
& ENA_FMT1_ID_MASK
) >> ENA_FMT1_ID_SHFT
;
806 id
= (ena
& ENA_FMT2_ID_MASK
) >> ENA_FMT2_ID_SHFT
;
816 fm_ena_time_get(uint64_t ena
)
820 switch (ENA_FORMAT(ena
)) {
822 time
= (ena
& ENA_FMT1_TIME_MASK
) >> ENA_FMT1_TIME_SHFT
;
825 time
= (ena
& ENA_FMT2_TIME_MASK
) >> ENA_FMT2_TIME_SHFT
;