4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 * Fault Management Architecture (FMA) Resource and Protocol Support
28 * The routines contained herein provide services to support kernel subsystems
29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
31 * Name-Value Pair Lists
33 * The embodiment of an FMA protocol element (event, fmri or authority) is a
34 * name-value pair list (nvlist_t). FMA-specific nvlist constructor and
35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36 * to create an nvpair list using custom allocators. Callers may choose to
37 * allocate either from the kernel memory allocator, or from a preallocated
38 * buffer, useful in constrained contexts like high-level interrupt routines.
40 * Protocol Event and FMRI Construction
42 * Convenience routines are provided to construct nvlist events according to
43 * the FMA Event Protocol and Naming Schema specification for ereports and
44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
48 * Routines to generate ENA formats 0, 1 and 2 are available as well as
49 * routines to increment formats 1 and 2. Individual fields within the
50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51 * fm_ena_format_get() and fm_ena_gen_get().
54 #include <sys/types.h>
57 #include <sys/nvpair.h>
58 #include <sys/cmn_err.h>
59 #include <sys/sysmacros.h>
60 #include <sys/sunddi.h>
61 #include <sys/systeminfo.h>
62 #include <sys/fm/util.h>
63 #include <sys/fm/protocol.h>
64 #include <sys/kstat.h>
65 #include <sys/zfs_context.h>
67 #include <sys/atomic.h>
68 #include <sys/condvar.h>
69 #include <sys/zfs_ioctl.h>
71 static uint_t zfs_zevent_len_max
= 512;
73 static uint_t zevent_len_cur
= 0;
74 static int zevent_waiters
= 0;
75 static int zevent_flags
= 0;
77 /* Num events rate limited since the last time zfs_zevent_next() was called */
78 static uint64_t ratelimit_dropped
= 0;
81 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
82 * posted. The posted EIDs are monotonically increasing but not persistent.
83 * They will be reset to the initial value (1) each time the kernel module is
86 static uint64_t zevent_eid
= 0;
88 static kmutex_t zevent_lock
;
89 static list_t zevent_list
;
90 static kcondvar_t zevent_cv
;
95 * Common fault management kstats to record event generation failures
99 kstat_named_t erpt_dropped
; /* num erpts dropped on post */
100 kstat_named_t erpt_set_failed
; /* num erpt set failures */
101 kstat_named_t fmri_set_failed
; /* num fmri set failures */
102 kstat_named_t payload_set_failed
; /* num payload set failures */
103 kstat_named_t erpt_duplicates
; /* num duplicate erpts */
106 static struct erpt_kstat erpt_kstat_data
= {
107 { "erpt-dropped", KSTAT_DATA_UINT64
},
108 { "erpt-set-failed", KSTAT_DATA_UINT64
},
109 { "fmri-set-failed", KSTAT_DATA_UINT64
},
110 { "payload-set-failed", KSTAT_DATA_UINT64
},
111 { "erpt-duplicates", KSTAT_DATA_UINT64
}
119 zfs_zevent_alloc(void)
123 ev
= kmem_zalloc(sizeof (zevent_t
), KM_SLEEP
);
125 list_create(&ev
->ev_ze_list
, sizeof (zfs_zevent_t
),
126 offsetof(zfs_zevent_t
, ze_node
));
127 list_link_init(&ev
->ev_node
);
133 zfs_zevent_free(zevent_t
*ev
)
135 /* Run provided cleanup callback */
136 ev
->ev_cb(ev
->ev_nvl
, ev
->ev_detector
);
138 list_destroy(&ev
->ev_ze_list
);
139 kmem_free(ev
, sizeof (zevent_t
));
143 zfs_zevent_drain(zevent_t
*ev
)
147 ASSERT(MUTEX_HELD(&zevent_lock
));
148 list_remove(&zevent_list
, ev
);
150 /* Remove references to this event in all private file data */
151 while ((ze
= list_head(&ev
->ev_ze_list
)) != NULL
) {
152 list_remove(&ev
->ev_ze_list
, ze
);
153 ze
->ze_zevent
= NULL
;
161 zfs_zevent_drain_all(uint_t
*count
)
165 mutex_enter(&zevent_lock
);
166 while ((ev
= list_head(&zevent_list
)) != NULL
)
167 zfs_zevent_drain(ev
);
169 *count
= zevent_len_cur
;
171 mutex_exit(&zevent_lock
);
175 * New zevents are inserted at the head. If the maximum queue
176 * length is exceeded a zevent will be drained from the tail.
177 * As part of this any user space processes which currently have
178 * a reference to this zevent_t in their private data will have
179 * this reference set to NULL.
182 zfs_zevent_insert(zevent_t
*ev
)
184 ASSERT(MUTEX_HELD(&zevent_lock
));
185 list_insert_head(&zevent_list
, ev
);
187 if (zevent_len_cur
>= zfs_zevent_len_max
)
188 zfs_zevent_drain(list_tail(&zevent_list
));
194 * Post a zevent. The cb will be called when nvl and detector are no longer
196 * - An error happened and a zevent can't be posted. In this case, cb is called
197 * before zfs_zevent_post() returns.
198 * - The event is being drained and freed.
201 zfs_zevent_post(nvlist_t
*nvl
, nvlist_t
*detector
, zevent_cb_t
*cb
)
213 tv_array
[0] = tv
.tv_sec
;
214 tv_array
[1] = tv
.tv_nsec
;
216 error
= nvlist_add_int64_array(nvl
, FM_EREPORT_TIME
, tv_array
, 2);
218 atomic_inc_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
);
222 eid
= atomic_inc_64_nv(&zevent_eid
);
223 error
= nvlist_add_uint64(nvl
, FM_EREPORT_EID
, eid
);
225 atomic_inc_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
);
229 error
= nvlist_size(nvl
, &nvl_size
, NV_ENCODE_NATIVE
);
231 atomic_inc_64(&erpt_kstat_data
.erpt_dropped
.value
.ui64
);
235 if (nvl_size
> ERPT_DATA_SZ
|| nvl_size
== 0) {
236 atomic_inc_64(&erpt_kstat_data
.erpt_dropped
.value
.ui64
);
241 ev
= zfs_zevent_alloc();
243 atomic_inc_64(&erpt_kstat_data
.erpt_dropped
.value
.ui64
);
249 ev
->ev_detector
= detector
;
253 mutex_enter(&zevent_lock
);
254 zfs_zevent_insert(ev
);
255 cv_broadcast(&zevent_cv
);
256 mutex_exit(&zevent_lock
);
266 zfs_zevent_track_duplicate(void)
268 atomic_inc_64(&erpt_kstat_data
.erpt_duplicates
.value
.ui64
);
272 zfs_zevent_minor_to_state(minor_t minor
, zfs_zevent_t
**ze
)
274 *ze
= zfsdev_get_state(minor
, ZST_ZEVENT
);
276 return (SET_ERROR(EBADF
));
282 zfs_zevent_fd_hold(int fd
, minor_t
*minorp
, zfs_zevent_t
**ze
)
284 zfs_file_t
*fp
= zfs_file_get(fd
);
288 int error
= zfsdev_getminor(fp
, minorp
);
290 error
= zfs_zevent_minor_to_state(*minorp
, ze
);
293 zfs_zevent_fd_rele(fp
);
301 zfs_zevent_fd_rele(zfs_file_t
*fp
)
307 * Get the next zevent in the stream and place a copy in 'event'. This
308 * may fail with ENOMEM if the encoded nvlist size exceeds the passed
309 * 'event_size'. In this case the stream pointer is not advanced and
310 * and 'event_size' is set to the minimum required buffer size.
313 zfs_zevent_next(zfs_zevent_t
*ze
, nvlist_t
**event
, uint64_t *event_size
,
320 mutex_enter(&zevent_lock
);
321 if (ze
->ze_zevent
== NULL
) {
322 /* New stream start at the beginning/tail */
323 ev
= list_tail(&zevent_list
);
330 * Existing stream continue with the next element and remove
331 * ourselves from the wait queue for the previous element
333 ev
= list_prev(&zevent_list
, ze
->ze_zevent
);
340 VERIFY(nvlist_size(ev
->ev_nvl
, &size
, NV_ENCODE_NATIVE
) == 0);
341 if (size
> *event_size
) {
348 list_remove(&ze
->ze_zevent
->ev_ze_list
, ze
);
351 list_insert_head(&ev
->ev_ze_list
, ze
);
352 (void) nvlist_dup(ev
->ev_nvl
, event
, KM_SLEEP
);
353 *dropped
= ze
->ze_dropped
;
356 /* Include events dropped due to rate limiting */
357 *dropped
+= atomic_swap_64(&ratelimit_dropped
, 0);
361 mutex_exit(&zevent_lock
);
367 * Wait in an interruptible state for any new events.
370 zfs_zevent_wait(zfs_zevent_t
*ze
)
374 mutex_enter(&zevent_lock
);
377 while (error
== EAGAIN
) {
378 if (zevent_flags
& ZEVENT_SHUTDOWN
) {
379 error
= SET_ERROR(ESHUTDOWN
);
383 if (cv_wait_sig(&zevent_cv
, &zevent_lock
) == 0) {
384 error
= SET_ERROR(EINTR
);
386 } else if (!list_is_empty(&zevent_list
)) {
395 mutex_exit(&zevent_lock
);
401 * The caller may seek to a specific EID by passing that EID. If the EID
402 * is still available in the posted list of events the cursor is positioned
403 * there. Otherwise ENOENT is returned and the cursor is not moved.
405 * There are two reserved EIDs which may be passed and will never fail.
406 * ZEVENT_SEEK_START positions the cursor at the start of the list, and
407 * ZEVENT_SEEK_END positions the cursor at the end of the list.
410 zfs_zevent_seek(zfs_zevent_t
*ze
, uint64_t eid
)
415 mutex_enter(&zevent_lock
);
417 if (eid
== ZEVENT_SEEK_START
) {
419 list_remove(&ze
->ze_zevent
->ev_ze_list
, ze
);
421 ze
->ze_zevent
= NULL
;
425 if (eid
== ZEVENT_SEEK_END
) {
427 list_remove(&ze
->ze_zevent
->ev_ze_list
, ze
);
429 ev
= list_head(&zevent_list
);
432 list_insert_head(&ev
->ev_ze_list
, ze
);
434 ze
->ze_zevent
= NULL
;
440 for (ev
= list_tail(&zevent_list
); ev
!= NULL
;
441 ev
= list_prev(&zevent_list
, ev
)) {
442 if (ev
->ev_eid
== eid
) {
444 list_remove(&ze
->ze_zevent
->ev_ze_list
, ze
);
447 list_insert_head(&ev
->ev_ze_list
, ze
);
456 mutex_exit(&zevent_lock
);
462 zfs_zevent_init(zfs_zevent_t
**zep
)
466 ze
= *zep
= kmem_zalloc(sizeof (zfs_zevent_t
), KM_SLEEP
);
467 list_link_init(&ze
->ze_node
);
471 zfs_zevent_destroy(zfs_zevent_t
*ze
)
473 mutex_enter(&zevent_lock
);
475 list_remove(&ze
->ze_zevent
->ev_ze_list
, ze
);
476 mutex_exit(&zevent_lock
);
478 kmem_free(ze
, sizeof (zfs_zevent_t
));
483 * Wrappers for FM nvlist allocators
486 i_fm_alloc(nv_alloc_t
*nva
, size_t size
)
489 return (kmem_alloc(size
, KM_SLEEP
));
493 i_fm_free(nv_alloc_t
*nva
, void *buf
, size_t size
)
496 kmem_free(buf
, size
);
499 static const nv_alloc_ops_t fm_mem_alloc_ops
= {
502 .nv_ao_alloc
= i_fm_alloc
,
503 .nv_ao_free
= i_fm_free
,
508 * Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer
509 * to the newly allocated nv_alloc_t structure is returned upon success or NULL
510 * is returned to indicate that the nv_alloc structure could not be created.
513 fm_nva_xcreate(char *buf
, size_t bufsz
)
515 nv_alloc_t
*nvhdl
= kmem_zalloc(sizeof (nv_alloc_t
), KM_SLEEP
);
517 if (bufsz
== 0 || nv_alloc_init(nvhdl
, nv_fixed_ops
, buf
, bufsz
) != 0) {
518 kmem_free(nvhdl
, sizeof (nv_alloc_t
));
526 * Destroy a previously allocated nv_alloc structure. The fixed buffer
527 * associated with nva must be freed by the caller.
530 fm_nva_xdestroy(nv_alloc_t
*nva
)
533 kmem_free(nva
, sizeof (nv_alloc_t
));
537 * Create a new nv list. A pointer to a new nv list structure is returned
538 * upon success or NULL is returned to indicate that the structure could
539 * not be created. The newly created nv list is created and managed by the
540 * operations installed in nva. If nva is NULL, the default FMA nva
541 * operations are installed and used.
543 * When called from the kernel and nva == NULL, this function must be called
544 * from passive kernel context with no locks held that can prevent a
545 * sleeping memory allocation from occurring. Otherwise, this function may
546 * be called from other kernel contexts as long a valid nva created via
547 * fm_nva_create() is supplied.
550 fm_nvlist_create(nv_alloc_t
*nva
)
557 nvhdl
= kmem_zalloc(sizeof (nv_alloc_t
), KM_SLEEP
);
559 if (nv_alloc_init(nvhdl
, &fm_mem_alloc_ops
, NULL
, 0) != 0) {
560 kmem_free(nvhdl
, sizeof (nv_alloc_t
));
568 if (nvlist_xalloc(&nvl
, NV_UNIQUE_NAME
, nvhdl
) != 0) {
570 nv_alloc_fini(nvhdl
);
571 kmem_free(nvhdl
, sizeof (nv_alloc_t
));
580 * Destroy a previously allocated nvlist structure. flag indicates whether
581 * or not the associated nva structure should be freed (FM_NVA_FREE) or
582 * retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows
583 * it to be re-used for future nvlist creation operations.
586 fm_nvlist_destroy(nvlist_t
*nvl
, int flag
)
588 nv_alloc_t
*nva
= nvlist_lookup_nv_alloc(nvl
);
593 if (flag
== FM_NVA_FREE
)
594 fm_nva_xdestroy(nva
);
599 i_fm_payload_set(nvlist_t
*payload
, const char *name
, va_list ap
)
604 while (ret
== 0 && name
!= NULL
) {
605 type
= va_arg(ap
, data_type_t
);
608 ret
= nvlist_add_byte(payload
, name
,
611 case DATA_TYPE_BYTE_ARRAY
:
612 nelem
= va_arg(ap
, int);
613 ret
= nvlist_add_byte_array(payload
, name
,
614 va_arg(ap
, uchar_t
*), nelem
);
616 case DATA_TYPE_BOOLEAN_VALUE
:
617 ret
= nvlist_add_boolean_value(payload
, name
,
618 va_arg(ap
, boolean_t
));
620 case DATA_TYPE_BOOLEAN_ARRAY
:
621 nelem
= va_arg(ap
, int);
622 ret
= nvlist_add_boolean_array(payload
, name
,
623 va_arg(ap
, boolean_t
*), nelem
);
626 ret
= nvlist_add_int8(payload
, name
,
629 case DATA_TYPE_INT8_ARRAY
:
630 nelem
= va_arg(ap
, int);
631 ret
= nvlist_add_int8_array(payload
, name
,
632 va_arg(ap
, int8_t *), nelem
);
634 case DATA_TYPE_UINT8
:
635 ret
= nvlist_add_uint8(payload
, name
,
638 case DATA_TYPE_UINT8_ARRAY
:
639 nelem
= va_arg(ap
, int);
640 ret
= nvlist_add_uint8_array(payload
, name
,
641 va_arg(ap
, uint8_t *), nelem
);
643 case DATA_TYPE_INT16
:
644 ret
= nvlist_add_int16(payload
, name
,
647 case DATA_TYPE_INT16_ARRAY
:
648 nelem
= va_arg(ap
, int);
649 ret
= nvlist_add_int16_array(payload
, name
,
650 va_arg(ap
, int16_t *), nelem
);
652 case DATA_TYPE_UINT16
:
653 ret
= nvlist_add_uint16(payload
, name
,
656 case DATA_TYPE_UINT16_ARRAY
:
657 nelem
= va_arg(ap
, int);
658 ret
= nvlist_add_uint16_array(payload
, name
,
659 va_arg(ap
, uint16_t *), nelem
);
661 case DATA_TYPE_INT32
:
662 ret
= nvlist_add_int32(payload
, name
,
663 va_arg(ap
, int32_t));
665 case DATA_TYPE_INT32_ARRAY
:
666 nelem
= va_arg(ap
, int);
667 ret
= nvlist_add_int32_array(payload
, name
,
668 va_arg(ap
, int32_t *), nelem
);
670 case DATA_TYPE_UINT32
:
671 ret
= nvlist_add_uint32(payload
, name
,
672 va_arg(ap
, uint32_t));
674 case DATA_TYPE_UINT32_ARRAY
:
675 nelem
= va_arg(ap
, int);
676 ret
= nvlist_add_uint32_array(payload
, name
,
677 va_arg(ap
, uint32_t *), nelem
);
679 case DATA_TYPE_INT64
:
680 ret
= nvlist_add_int64(payload
, name
,
681 va_arg(ap
, int64_t));
683 case DATA_TYPE_INT64_ARRAY
:
684 nelem
= va_arg(ap
, int);
685 ret
= nvlist_add_int64_array(payload
, name
,
686 va_arg(ap
, int64_t *), nelem
);
688 case DATA_TYPE_UINT64
:
689 ret
= nvlist_add_uint64(payload
, name
,
690 va_arg(ap
, uint64_t));
692 case DATA_TYPE_UINT64_ARRAY
:
693 nelem
= va_arg(ap
, int);
694 ret
= nvlist_add_uint64_array(payload
, name
,
695 va_arg(ap
, uint64_t *), nelem
);
697 case DATA_TYPE_STRING
:
698 ret
= nvlist_add_string(payload
, name
,
701 case DATA_TYPE_STRING_ARRAY
:
702 nelem
= va_arg(ap
, int);
703 ret
= nvlist_add_string_array(payload
, name
,
704 va_arg(ap
, const char **), nelem
);
706 case DATA_TYPE_NVLIST
:
707 ret
= nvlist_add_nvlist(payload
, name
,
708 va_arg(ap
, nvlist_t
*));
710 case DATA_TYPE_NVLIST_ARRAY
:
711 nelem
= va_arg(ap
, int);
712 ret
= nvlist_add_nvlist_array(payload
, name
,
713 va_arg(ap
, const nvlist_t
**), nelem
);
719 name
= va_arg(ap
, char *);
725 fm_payload_set(nvlist_t
*payload
, ...)
731 va_start(ap
, payload
);
732 name
= va_arg(ap
, char *);
733 ret
= i_fm_payload_set(payload
, name
, ap
);
737 atomic_inc_64(&erpt_kstat_data
.payload_set_failed
.value
.ui64
);
741 * Set-up and validate the members of an ereport event according to:
743 * Member name Type Value
744 * ====================================================
745 * class string ereport
748 * detector nvlist_t <detector>
749 * ereport-payload nvlist_t <var args>
751 * We don't actually add a 'version' member to the payload. Really,
752 * the version quoted to us by our caller is that of the category 1
753 * "ereport" event class (and we require FM_EREPORT_VERS0) but
754 * the payload version of the actual leaf class event under construction
755 * may be something else. Callers should supply a version in the varargs,
756 * or (better) we could take two version arguments - one for the
757 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
758 * for the leaf class.
761 fm_ereport_set(nvlist_t
*ereport
, int version
, const char *erpt_class
,
762 uint64_t ena
, const nvlist_t
*detector
, ...)
764 char ereport_class
[FM_MAX_CLASS
];
769 if (version
!= FM_EREPORT_VERS0
) {
770 atomic_inc_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
);
774 (void) snprintf(ereport_class
, FM_MAX_CLASS
, "%s.%s",
775 FM_EREPORT_CLASS
, erpt_class
);
776 if (nvlist_add_string(ereport
, FM_CLASS
, ereport_class
) != 0) {
777 atomic_inc_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
);
781 if (nvlist_add_uint64(ereport
, FM_EREPORT_ENA
, ena
)) {
782 atomic_inc_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
);
785 if (nvlist_add_nvlist(ereport
, FM_EREPORT_DETECTOR
,
786 (nvlist_t
*)detector
) != 0) {
787 atomic_inc_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
);
790 va_start(ap
, detector
);
791 name
= va_arg(ap
, const char *);
792 ret
= i_fm_payload_set(ereport
, name
, ap
);
796 atomic_inc_64(&erpt_kstat_data
.erpt_set_failed
.value
.ui64
);
800 * Set-up and validate the members of an hc fmri according to;
802 * Member name Type Value
803 * ===================================================
805 * auth nvlist_t <auth>
806 * hc-name string <name>
809 * Note that auth and hc-id are optional members.
812 #define HC_MAXPAIRS 20
813 #define HC_MAXNAMELEN 50
816 fm_fmri_hc_set_common(nvlist_t
*fmri
, int version
, const nvlist_t
*auth
)
818 if (version
!= FM_HC_SCHEME_VERSION
) {
819 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
823 if (nvlist_add_uint8(fmri
, FM_VERSION
, version
) != 0 ||
824 nvlist_add_string(fmri
, FM_FMRI_SCHEME
, FM_FMRI_SCHEME_HC
) != 0) {
825 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
829 if (auth
!= NULL
&& nvlist_add_nvlist(fmri
, FM_FMRI_AUTHORITY
,
830 (nvlist_t
*)auth
) != 0) {
831 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
839 fm_fmri_hc_set(nvlist_t
*fmri
, int version
, const nvlist_t
*auth
,
840 nvlist_t
*snvl
, int npairs
, ...)
842 nv_alloc_t
*nva
= nvlist_lookup_nv_alloc(fmri
);
843 nvlist_t
*pairs
[HC_MAXPAIRS
];
847 if (!fm_fmri_hc_set_common(fmri
, version
, auth
))
850 npairs
= MIN(npairs
, HC_MAXPAIRS
);
852 va_start(ap
, npairs
);
853 for (i
= 0; i
< npairs
; i
++) {
854 const char *name
= va_arg(ap
, const char *);
855 uint32_t id
= va_arg(ap
, uint32_t);
858 (void) snprintf(idstr
, sizeof (idstr
), "%u", id
);
860 pairs
[i
] = fm_nvlist_create(nva
);
861 if (nvlist_add_string(pairs
[i
], FM_FMRI_HC_NAME
, name
) != 0 ||
862 nvlist_add_string(pairs
[i
], FM_FMRI_HC_ID
, idstr
) != 0) {
864 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
869 if (nvlist_add_nvlist_array(fmri
, FM_FMRI_HC_LIST
,
870 (const nvlist_t
**)pairs
, npairs
) != 0) {
871 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
874 for (i
= 0; i
< npairs
; i
++)
875 fm_nvlist_destroy(pairs
[i
], FM_NVA_RETAIN
);
878 if (nvlist_add_nvlist(fmri
, FM_FMRI_HC_SPECIFIC
, snvl
) != 0) {
880 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
886 fm_fmri_hc_create(nvlist_t
*fmri
, int version
, const nvlist_t
*auth
,
887 nvlist_t
*snvl
, nvlist_t
*bboard
, int npairs
, ...)
889 nv_alloc_t
*nva
= nvlist_lookup_nv_alloc(fmri
);
890 nvlist_t
*pairs
[HC_MAXPAIRS
];
895 const char *hcname
, *hcid
;
897 if (!fm_fmri_hc_set_common(fmri
, version
, auth
))
901 * copy the bboard nvpairs to the pairs array
903 if (nvlist_lookup_nvlist_array(bboard
, FM_FMRI_HC_LIST
, &hcl
, &n
)
905 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
909 for (i
= 0; i
< n
; i
++) {
910 if (nvlist_lookup_string(hcl
[i
], FM_FMRI_HC_NAME
,
913 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
916 if (nvlist_lookup_string(hcl
[i
], FM_FMRI_HC_ID
, &hcid
) != 0) {
918 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
922 pairs
[i
] = fm_nvlist_create(nva
);
923 if (nvlist_add_string(pairs
[i
], FM_FMRI_HC_NAME
, hcname
) != 0 ||
924 nvlist_add_string(pairs
[i
], FM_FMRI_HC_ID
, hcid
) != 0) {
925 for (j
= 0; j
<= i
; j
++) {
926 if (pairs
[j
] != NULL
)
927 fm_nvlist_destroy(pairs
[j
],
931 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
937 * create the pairs from passed in pairs
939 npairs
= MIN(npairs
, HC_MAXPAIRS
);
941 va_start(ap
, npairs
);
942 for (i
= n
; i
< npairs
+ n
; i
++) {
943 const char *name
= va_arg(ap
, const char *);
944 uint32_t id
= va_arg(ap
, uint32_t);
946 (void) snprintf(idstr
, sizeof (idstr
), "%u", id
);
947 pairs
[i
] = fm_nvlist_create(nva
);
948 if (nvlist_add_string(pairs
[i
], FM_FMRI_HC_NAME
, name
) != 0 ||
949 nvlist_add_string(pairs
[i
], FM_FMRI_HC_ID
, idstr
) != 0) {
950 for (j
= 0; j
<= i
; j
++) {
951 if (pairs
[j
] != NULL
)
952 fm_nvlist_destroy(pairs
[j
],
956 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
964 * Create the fmri hc list
966 if (nvlist_add_nvlist_array(fmri
, FM_FMRI_HC_LIST
,
967 (const nvlist_t
**)pairs
, npairs
+ n
) != 0) {
968 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
972 for (i
= 0; i
< npairs
+ n
; i
++) {
973 fm_nvlist_destroy(pairs
[i
], FM_NVA_RETAIN
);
977 if (nvlist_add_nvlist(fmri
, FM_FMRI_HC_SPECIFIC
, snvl
) != 0) {
979 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
986 * Set-up and validate the members of an dev fmri according to:
988 * Member name Type Value
989 * ====================================================
991 * auth nvlist_t <auth>
992 * devpath string <devpath>
993 * [devid] string <devid>
994 * [target-port-l0id] string <target-port-lun0-id>
996 * Note that auth and devid are optional members.
999 fm_fmri_dev_set(nvlist_t
*fmri_dev
, int version
, const nvlist_t
*auth
,
1000 const char *devpath
, const char *devid
, const char *tpl0
)
1004 if (version
!= DEV_SCHEME_VERSION0
) {
1005 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1009 err
|= nvlist_add_uint8(fmri_dev
, FM_VERSION
, version
);
1010 err
|= nvlist_add_string(fmri_dev
, FM_FMRI_SCHEME
, FM_FMRI_SCHEME_DEV
);
1013 err
|= nvlist_add_nvlist(fmri_dev
, FM_FMRI_AUTHORITY
,
1017 err
|= nvlist_add_string(fmri_dev
, FM_FMRI_DEV_PATH
, devpath
);
1020 err
|= nvlist_add_string(fmri_dev
, FM_FMRI_DEV_ID
, devid
);
1023 err
|= nvlist_add_string(fmri_dev
, FM_FMRI_DEV_TGTPTLUN0
, tpl0
);
1026 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1031 * Set-up and validate the members of an cpu fmri according to:
1033 * Member name Type Value
1034 * ====================================================
1036 * auth nvlist_t <auth>
1037 * cpuid uint32_t <cpu_id>
1038 * cpumask uint8_t <cpu_mask>
1039 * serial uint64_t <serial_id>
1041 * Note that auth, cpumask, serial are optional members.
1045 fm_fmri_cpu_set(nvlist_t
*fmri_cpu
, int version
, const nvlist_t
*auth
,
1046 uint32_t cpu_id
, uint8_t *cpu_maskp
, const char *serial_idp
)
1048 uint64_t *failedp
= &erpt_kstat_data
.fmri_set_failed
.value
.ui64
;
1050 if (version
< CPU_SCHEME_VERSION1
) {
1051 atomic_inc_64(failedp
);
1055 if (nvlist_add_uint8(fmri_cpu
, FM_VERSION
, version
) != 0) {
1056 atomic_inc_64(failedp
);
1060 if (nvlist_add_string(fmri_cpu
, FM_FMRI_SCHEME
,
1061 FM_FMRI_SCHEME_CPU
) != 0) {
1062 atomic_inc_64(failedp
);
1066 if (auth
!= NULL
&& nvlist_add_nvlist(fmri_cpu
, FM_FMRI_AUTHORITY
,
1067 (nvlist_t
*)auth
) != 0)
1068 atomic_inc_64(failedp
);
1070 if (nvlist_add_uint32(fmri_cpu
, FM_FMRI_CPU_ID
, cpu_id
) != 0)
1071 atomic_inc_64(failedp
);
1073 if (cpu_maskp
!= NULL
&& nvlist_add_uint8(fmri_cpu
, FM_FMRI_CPU_MASK
,
1075 atomic_inc_64(failedp
);
1077 if (serial_idp
== NULL
|| nvlist_add_string(fmri_cpu
,
1078 FM_FMRI_CPU_SERIAL_ID
, (char *)serial_idp
) != 0)
1079 atomic_inc_64(failedp
);
1083 * Set-up and validate the members of a mem according to:
1085 * Member name Type Value
1086 * ====================================================
1088 * auth nvlist_t <auth> [optional]
1089 * unum string <unum>
1090 * serial string <serial> [optional*]
1091 * offset uint64_t <offset> [optional]
1093 * * serial is required if offset is present
1096 fm_fmri_mem_set(nvlist_t
*fmri
, int version
, const nvlist_t
*auth
,
1097 const char *unum
, const char *serial
, uint64_t offset
)
1099 if (version
!= MEM_SCHEME_VERSION0
) {
1100 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1104 if (!serial
&& (offset
!= (uint64_t)-1)) {
1105 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1109 if (nvlist_add_uint8(fmri
, FM_VERSION
, version
) != 0) {
1110 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1114 if (nvlist_add_string(fmri
, FM_FMRI_SCHEME
, FM_FMRI_SCHEME_MEM
) != 0) {
1115 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1120 if (nvlist_add_nvlist(fmri
, FM_FMRI_AUTHORITY
,
1121 (nvlist_t
*)auth
) != 0) {
1123 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1127 if (nvlist_add_string(fmri
, FM_FMRI_MEM_UNUM
, unum
) != 0) {
1128 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1131 if (serial
!= NULL
) {
1132 if (nvlist_add_string_array(fmri
, FM_FMRI_MEM_SERIAL_ID
,
1133 (const char **)&serial
, 1) != 0) {
1135 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1137 if (offset
!= (uint64_t)-1 && nvlist_add_uint64(fmri
,
1138 FM_FMRI_MEM_OFFSET
, offset
) != 0) {
1140 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1146 fm_fmri_zfs_set(nvlist_t
*fmri
, int version
, uint64_t pool_guid
,
1149 if (version
!= ZFS_SCHEME_VERSION0
) {
1150 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1154 if (nvlist_add_uint8(fmri
, FM_VERSION
, version
) != 0) {
1155 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1159 if (nvlist_add_string(fmri
, FM_FMRI_SCHEME
, FM_FMRI_SCHEME_ZFS
) != 0) {
1160 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1164 if (nvlist_add_uint64(fmri
, FM_FMRI_ZFS_POOL
, pool_guid
) != 0) {
1165 atomic_inc_64(&erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1168 if (vdev_guid
!= 0) {
1169 if (nvlist_add_uint64(fmri
, FM_FMRI_ZFS_VDEV
, vdev_guid
) != 0) {
1171 &erpt_kstat_data
.fmri_set_failed
.value
.ui64
);
1177 fm_ena_increment(uint64_t ena
)
1181 switch (ENA_FORMAT(ena
)) {
1183 new_ena
= ena
+ (1 << ENA_FMT1_GEN_SHFT
);
1186 new_ena
= ena
+ (1 << ENA_FMT2_GEN_SHFT
);
1196 fm_ena_generate_cpu(uint64_t timestamp
, processorid_t cpuid
, uchar_t format
)
1203 ena
= (uint64_t)((format
& ENA_FORMAT_MASK
) |
1204 ((cpuid
<< ENA_FMT1_CPUID_SHFT
) &
1205 ENA_FMT1_CPUID_MASK
) |
1206 ((timestamp
<< ENA_FMT1_TIME_SHFT
) &
1207 ENA_FMT1_TIME_MASK
));
1209 ena
= (uint64_t)((format
& ENA_FORMAT_MASK
) |
1210 ((cpuid
<< ENA_FMT1_CPUID_SHFT
) &
1211 ENA_FMT1_CPUID_MASK
) |
1212 ((gethrtime() << ENA_FMT1_TIME_SHFT
) &
1213 ENA_FMT1_TIME_MASK
));
1217 ena
= (uint64_t)((format
& ENA_FORMAT_MASK
) |
1218 ((timestamp
<< ENA_FMT2_TIME_SHFT
) & ENA_FMT2_TIME_MASK
));
1228 fm_ena_generate(uint64_t timestamp
, uchar_t format
)
1233 ena
= fm_ena_generate_cpu(timestamp
, getcpuid(), format
);
1240 fm_ena_generation_get(uint64_t ena
)
1244 switch (ENA_FORMAT(ena
)) {
1246 gen
= (ena
& ENA_FMT1_GEN_MASK
) >> ENA_FMT1_GEN_SHFT
;
1249 gen
= (ena
& ENA_FMT2_GEN_MASK
) >> ENA_FMT2_GEN_SHFT
;
1260 fm_ena_format_get(uint64_t ena
)
1263 return (ENA_FORMAT(ena
));
1267 fm_ena_id_get(uint64_t ena
)
1271 switch (ENA_FORMAT(ena
)) {
1273 id
= (ena
& ENA_FMT1_ID_MASK
) >> ENA_FMT1_ID_SHFT
;
1276 id
= (ena
& ENA_FMT2_ID_MASK
) >> ENA_FMT2_ID_SHFT
;
1286 fm_ena_time_get(uint64_t ena
)
1290 switch (ENA_FORMAT(ena
)) {
1292 time
= (ena
& ENA_FMT1_TIME_MASK
) >> ENA_FMT1_TIME_SHFT
;
1295 time
= (ena
& ENA_FMT2_TIME_MASK
) >> ENA_FMT2_TIME_SHFT
;
1306 * Helper function to increment ereport dropped count. Used by the event
1307 * rate limiting code to give feedback to the user about how many events were
1308 * rate limited by including them in the 'dropped' count.
1311 fm_erpt_dropped_increment(void)
1313 atomic_inc_64(&ratelimit_dropped
);
1322 /* Initialize zevent allocation and generation kstats */
1323 fm_ksp
= kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED
,
1324 sizeof (struct erpt_kstat
) / sizeof (kstat_named_t
),
1325 KSTAT_FLAG_VIRTUAL
);
1327 if (fm_ksp
!= NULL
) {
1328 fm_ksp
->ks_data
= &erpt_kstat_data
;
1329 kstat_install(fm_ksp
);
1331 cmn_err(CE_NOTE
, "failed to create fm/misc kstat\n");
1334 mutex_init(&zevent_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1335 list_create(&zevent_list
, sizeof (zevent_t
),
1336 offsetof(zevent_t
, ev_node
));
1337 cv_init(&zevent_cv
, NULL
, CV_DEFAULT
, NULL
);
1349 zfs_zevent_drain_all(&count
);
1351 mutex_enter(&zevent_lock
);
1352 cv_broadcast(&zevent_cv
);
1354 zevent_flags
|= ZEVENT_SHUTDOWN
;
1355 while (zevent_waiters
> 0) {
1356 mutex_exit(&zevent_lock
);
1357 kpreempt(KPREEMPT_SYNC
);
1358 mutex_enter(&zevent_lock
);
1360 mutex_exit(&zevent_lock
);
1362 cv_destroy(&zevent_cv
);
1363 list_destroy(&zevent_list
);
1364 mutex_destroy(&zevent_lock
);
1366 if (fm_ksp
!= NULL
) {
1367 kstat_delete(fm_ksp
);
1371 #endif /* _KERNEL */
1373 ZFS_MODULE_PARAM(zfs_zevent
, zfs_zevent_
, len_max
, UINT
, ZMOD_RW
,
1374 "Max event queue length");