4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/sysevent/eventdefs.h>
27 #include <sys/sysevent.h>
28 #include <sys/sysevent_impl.h>
29 #include <sys/fm/protocol.h>
30 #include <sys/sysmacros.h>
31 #include <sys/dumphdr.h>
32 #include <sys/dumpadm.h>
33 #include <sys/fm/util.h>
35 #include <libsysevent.h>
36 #include <libnvpair.h>
52 #include <fmd_dispq.h>
54 #include <fmd_module.h>
55 #include <fmd_protocol.h>
56 #include <fmd_scheme.h>
57 #include <fmd_error.h>
61 static char *sysev_channel
; /* event channel to which we are subscribed */
62 static char *sysev_class
; /* event class to which we are subscribed */
63 static char *sysev_device
; /* device path to use for replaying events */
64 static char *sysev_sid
; /* event channel subscriber identifier */
65 static void *sysev_evc
; /* event channel cookie from evc_bind */
67 static fmd_xprt_t
*sysev_xprt
;
68 static int sysev_xprt_refcnt
;
69 static fmd_hdl_t
*sysev_hdl
;
71 static struct sysev_stats
{
72 fmd_stat_t dump_replay
;
78 { "dump_replay", FMD_TYPE_UINT64
, "events replayed from dump device" },
79 { "dump_lost", FMD_TYPE_UINT64
, "events lost from dump device" },
80 { "bad_class", FMD_TYPE_UINT64
, "events dropped due to invalid class" },
81 { "bad_attr", FMD_TYPE_UINT64
, "events dropped due to invalid nvlist" },
82 { "eagain", FMD_TYPE_UINT64
, "events retried due to low memory" },
85 static pthread_cond_t sysev_cv
= PTHREAD_COND_INITIALIZER
;
86 static pthread_mutex_t sysev_mutex
= PTHREAD_MUTEX_INITIALIZER
;
87 static int sysev_replay_wait
= 1;
88 static int sysev_exiting
;
90 static sysevent_subattr_t
*subattr
;
93 * Entry point for legacy sysevents. This function is responsible for two
94 * things: passing off interesting events to the DR handler, and converting
95 * sysevents into resource events that modules can then subscribe to.
98 sysev_legacy(sysevent_t
*sep
)
100 const char *class = sysevent_get_class_name(sep
);
101 const char *subclass
= sysevent_get_subclass_name(sep
);
104 nvlist_t
*attr
, *nvl
;
107 /* notify the DR subsystem of the event */
110 /* get the matching sysevent name */
111 len
= snprintf(NULL
, 0, "%s%s.%s", SYSEVENT_RSRC_CLASS
,
113 fullclass
= alloca(len
+ 1);
114 (void) snprintf(fullclass
, len
+ 1, "%s%s.%s",
115 SYSEVENT_RSRC_CLASS
, class, subclass
);
117 /* construct the event payload */
118 (void) nvlist_xalloc(&nvl
, NV_UNIQUE_NAME
, &fmd
.d_nva
);
119 if (sysevent_get_attr_list(sep
, &attr
) == 0) {
120 (void) nvlist_merge(nvl
, attr
, 0);
125 * Add class and version after the nvlist_merge() just in case
126 * the sysevent has an attribute called class or version.
128 (void) nvlist_add_string(nvl
, FM_CLASS
, fullclass
);
129 (void) nvlist_add_uint8(nvl
, FM_VERSION
, FM_RSRC_VERSION
);
132 * Dispatch the event. Because we have used sysevent_bind_xhandle
133 * the delivery thread is blessed as a proper fmd thread so
134 * we may use regular fmd api calls.
136 sysevent_get_time(sep
, &hrt
);
137 fmd_xprt_post(sysev_hdl
, sysev_xprt
, nvl
, hrt
);
141 * Receive an event from the SysEvent channel and post it to our transport.
142 * Under extreme low-memory situations where we cannot event unpack the event,
143 * we can request that SysEvent redeliver the event later by returning EAGAIN.
144 * If we do this too many times, the kernel will drop the event. Rather than
145 * keeping state per-event, we simply attempt a garbage-collect, hoping that
146 * enough free memory will be available by the time the event is redelivered.
149 sysev_recv(sysevent_t
*sep
, void *arg
)
151 uint64_t seq
= sysevent_get_seq(sep
);
152 fmd_xprt_t
*xp
= arg
;
157 (void) pthread_mutex_lock(&sysev_mutex
);
158 if (sysev_exiting
== 1) {
159 while (sysev_xprt_refcnt
> 0)
160 (void) pthread_cond_wait(&sysev_cv
, &sysev_mutex
);
161 (void) pthread_mutex_unlock(&sysev_mutex
);
165 while (sysev_replay_wait
)
166 (void) pthread_cond_wait(&sysev_cv
, &sysev_mutex
);
167 (void) pthread_mutex_unlock(&sysev_mutex
);
169 if (strcmp(sysevent_get_class_name(sep
), EC_FM
) != 0) {
170 fmd_hdl_error(sysev_hdl
, "discarding event 0x%llx: unexpected"
171 " transport class %s\n", seq
, sysevent_get_class_name(sep
));
172 sysev_stats
.bad_class
.fmds_value
.ui64
++;
173 } else if (sysevent_get_attr_list(sep
, &nvl
) != 0) {
174 if (errno
== EAGAIN
|| errno
== ENOMEM
) {
175 fmd_modhash_tryapply(fmd
.d_mod_hash
, fmd_module_trygc
);
176 fmd_scheme_hash_trygc(fmd
.d_schemes
);
177 sysev_stats
.eagain
.fmds_value
.ui64
++;
180 fmd_hdl_error(sysev_hdl
, "discarding event 0x%llx: "
181 "missing or invalid payload", seq
);
182 sysev_stats
.bad_attr
.fmds_value
.ui64
++;
185 sysevent_get_time(sep
, &hrt
);
186 fmd_xprt_post(sysev_hdl
, xp
, nvl
, hrt
);
189 (void) pthread_mutex_lock(&sysev_mutex
);
190 if (--sysev_xprt_refcnt
== 0 && sysev_exiting
== 1)
191 (void) pthread_cond_broadcast(&sysev_cv
);
192 (void) pthread_mutex_unlock(&sysev_mutex
);
198 * Checksum algorithm used by the dump transport for verifying the content of
199 * error reports saved on the dump device (copy of the kernel's checksum32()).
202 sysev_checksum(void *cp_arg
, size_t length
)
207 for (cp
= cp_arg
, ep
= cp
+ length
; cp
< ep
; cp
++)
208 sum
= ((sum
>> 1) | (sum
<< 31)) + *cp
;
214 * Replay saved events from the dump transport. This function is installed as
215 * the timer callback and is called only once during the module's lifetime.
219 sysev_replay(fmd_hdl_t
*hdl
, id_t id
, void *arg
)
226 * Determine the appropriate dump device to use for replaying pending
227 * error reports. If the device property is NULL (default), we
228 * open and query /dev/dump to determine the current dump device.
230 if ((dumpdev
= sysev_device
) == NULL
) {
231 if ((fd
= open("/dev/dump", O_RDONLY
)) == -1) {
232 fmd_hdl_error(hdl
, "failed to open /dev/dump "
233 "to locate dump device for event replay");
237 dumpdev
= alloca(PATH_MAX
);
238 err
= ioctl(fd
, DIOCGETDEV
, dumpdev
);
242 if (errno
!= ENODEV
) {
243 fmd_hdl_error(hdl
, "failed to obtain "
244 "path to dump device for event replay");
250 if (strcmp(dumpdev
, "/dev/null") == 0)
251 goto done
; /* return silently and skip replay for /dev/null */
254 * Open the appropriate device and then determine the offset of the
255 * start of the ereport dump region located at the end of the device.
257 if ((fd
= open64(dumpdev
, O_RDWR
| O_DSYNC
)) == -1) {
258 fmd_hdl_error(hdl
, "failed to open dump transport %s "
259 "(pending events will not be replayed)", dumpdev
);
263 off
= DUMP_OFFSET
+ DUMP_LOGSIZE
+ DUMP_ERPTSIZE
;
264 off
= off0
= lseek64(fd
, -off
, SEEK_END
) & -DUMP_OFFSET
;
266 if (off
== (off64_t
)-1LL) {
267 fmd_hdl_error(hdl
, "failed to seek dump transport %s "
268 "(pending events will not be replayed)", dumpdev
);
274 * The ereport dump region is a sequence of erpt_dump_t headers each of
275 * which is followed by packed nvlist data. We iterate over them in
276 * order, unpacking and dispatching each one to our dispatch queue.
279 char nvbuf
[ERPT_DATA_SZ
];
284 fmd_timeval_t ftv
, tod
;
288 if (pread64(fd
, &ed
, sizeof (ed
), off
) != sizeof (ed
)) {
289 fmd_hdl_error(hdl
, "failed to read from dump "
290 "transport %s (pending events lost)", dumpdev
);
294 if (ed
.ed_magic
== 0 && ed
.ed_size
== 0)
295 break; /* end of list: all zero */
297 if (ed
.ed_magic
== 0) {
298 off
+= sizeof (ed
) + ed
.ed_size
;
299 continue; /* continue searching */
302 if (ed
.ed_magic
!= ERPT_MAGIC
) {
304 * Stop reading silently if the first record has the
305 * wrong magic number; this likely indicates that we
306 * rebooted from non-FMA bits or paged over the dump.
311 fmd_hdl_error(hdl
, "invalid dump transport "
312 "record at %llx (magic number %x, expected %x)\n",
313 (u_longlong_t
)off
, ed
.ed_magic
, ERPT_MAGIC
);
317 if (ed
.ed_size
> ERPT_DATA_SZ
) {
318 fmd_hdl_error(hdl
, "invalid dump transport "
319 "record at %llx size (%u exceeds limit)\n",
320 (u_longlong_t
)off
, ed
.ed_size
);
324 if (pread64(fd
, nvbuf
, ed
.ed_size
,
325 off
+ sizeof (ed
)) != ed
.ed_size
) {
326 fmd_hdl_error(hdl
, "failed to read dump "
327 "transport event (offset %llx)", (u_longlong_t
)off
);
329 sysev_stats
.dump_lost
.fmds_value
.ui64
++;
333 if ((chksum
= sysev_checksum(nvbuf
,
334 ed
.ed_size
)) != ed
.ed_chksum
) {
335 fmd_hdl_error(hdl
, "dump transport event at "
336 "offset %llx is corrupt (checksum %x != %x)\n",
337 (u_longlong_t
)off
, chksum
, ed
.ed_chksum
);
339 sysev_stats
.dump_lost
.fmds_value
.ui64
++;
343 if ((err
= nvlist_xunpack(nvbuf
,
344 ed
.ed_size
, &nvl
, &fmd
.d_nva
)) != 0) {
345 fmd_hdl_error(hdl
, "failed to unpack dump "
346 "transport event at offset %llx: %s\n",
347 (u_longlong_t
)off
, fmd_strerror(err
));
349 sysev_stats
.dump_lost
.fmds_value
.ui64
++;
354 * If ed_hrt_nsec is set it contains the gethrtime() value from
355 * when the event was originally enqueued for the transport.
356 * If it is zero, we use the weaker bound ed_hrt_base instead.
358 if (ed
.ed_hrt_nsec
!= 0)
359 hrt
= ed
.ed_hrt_nsec
;
361 hrt
= ed
.ed_hrt_base
;
364 * If this is an FMA protocol event of class "ereport.*" that
365 * contains valid ENA, we can improve the precision of 'hrt'.
367 if (nvlist_lookup_uint64(nvl
, FM_EREPORT_ENA
, &ena
) == 0)
368 hrt
= fmd_time_ena2hrt(hrt
, ena
);
371 * Now convert 'hrt' to an adjustable TOD based on the values
372 * in ed_tod_base which correspond to one another and are
373 * sampled before reboot using the old gethrtime() clock.
374 * fmd_event_recreate() will use this TOD value to re-assign
375 * the event an updated gethrtime() value based on the current
376 * value of the non-adjustable gethrtime() clock. Phew.
378 tod
.ftv_sec
= ed
.ed_tod_base
.sec
;
379 tod
.ftv_nsec
= ed
.ed_tod_base
.nsec
;
380 fmd_time_hrt2tod(ed
.ed_hrt_base
, &tod
, hrt
, &ftv
);
382 (void) nvlist_remove_all(nvl
, FMD_EVN_TOD
);
383 (void) nvlist_add_uint64_array(nvl
,
384 FMD_EVN_TOD
, (uint64_t *)&ftv
, 2);
386 fmd_xprt_post(hdl
, sysev_xprt
, nvl
, 0);
387 sysev_stats
.dump_replay
.fmds_value
.ui64
++;
391 * Reset the magic number for the event record to zero so that
392 * we do not replay the same event multiple times.
396 if (pwrite64(fd
, &ed
, sizeof (ed
), off
) != sizeof (ed
)) {
397 fmd_hdl_error(hdl
, "failed to mark dump "
398 "transport event (offset %llx)", (u_longlong_t
)off
);
401 off
+= sizeof (ed
) + ed
.ed_size
;
406 (void) pthread_mutex_lock(&sysev_mutex
);
407 sysev_replay_wait
= 0;
408 (void) pthread_cond_broadcast(&sysev_cv
);
409 (void) pthread_mutex_unlock(&sysev_mutex
);
412 static const fmd_prop_t sysev_props
[] = {
413 { "class", FMD_TYPE_STRING
, EC_ALL
}, /* event class */
414 { "device", FMD_TYPE_STRING
, NULL
}, /* replay device */
415 { "channel", FMD_TYPE_STRING
, FM_ERROR_CHAN
}, /* channel name */
416 { "sid", FMD_TYPE_STRING
, "fmd" }, /* subscriber id */
420 static const fmd_hdl_ops_t sysev_ops
= {
421 NULL
, /* fmdo_recv */
422 sysev_replay
, /* fmdo_timeout */
423 NULL
, /* fmdo_close */
424 NULL
, /* fmdo_stats */
426 NULL
, /* fmdo_send */
429 static const fmd_hdl_info_t sysev_info
= {
430 "SysEvent Transport Agent", "1.0", &sysev_ops
, sysev_props
434 * Bind to the sysevent channel we use for listening for error events and then
435 * subscribe to appropriate events received over this channel. Setup the
436 * legacy sysevent handler for creating sysevent resources and forwarding DR
440 sysev_init(fmd_hdl_t
*hdl
)
443 const char *subclasses
[] = { EC_SUB_ALL
};
445 /* This builtin is for the global zone only */
446 if (getzoneid() != GLOBAL_ZONEID
)
449 if (fmd_hdl_register(hdl
, FMD_API_VERSION
, &sysev_info
) != 0)
450 return; /* invalid property settings */
452 (void) fmd_stat_create(hdl
, FMD_STAT_NOALLOC
, sizeof (sysev_stats
) /
453 sizeof (fmd_stat_t
), (fmd_stat_t
*)&sysev_stats
);
455 sysev_channel
= fmd_prop_get_string(hdl
, "channel");
456 sysev_class
= fmd_prop_get_string(hdl
, "class");
457 sysev_device
= fmd_prop_get_string(hdl
, "device");
458 sysev_sid
= fmd_prop_get_string(hdl
, "sid");
460 if (sysev_channel
== NULL
)
461 fmd_hdl_abort(hdl
, "channel property must be defined\n");
463 if (sysev_sid
== NULL
)
464 fmd_hdl_abort(hdl
, "sid property must be defined\n");
466 if ((errno
= sysevent_evc_bind(sysev_channel
, &sysev_evc
,
467 EVCH_CREAT
| EVCH_HOLD_PEND
)) != 0) {
468 fmd_hdl_abort(hdl
, "failed to bind to event transport "
469 "channel %s", sysev_channel
);
472 sysev_xprt
= fmd_xprt_open(hdl
, FMD_XPRT_RDONLY
|
473 FMD_XPRT_CACHE_AS_LOCAL
, NULL
, NULL
);
477 * If we're subscribing to the default channel, keep our subscription
478 * active even if we die unexpectedly so we continue queuing events.
479 * If we're not (e.g. running under fmsim), do not specify SUB_KEEP so
480 * that our event channel will be destroyed if we die unpleasantly.
482 if (strcmp(sysev_channel
, FM_ERROR_CHAN
) == 0)
483 flags
= EVCH_SUB_KEEP
| EVCH_SUB_DUMP
;
485 flags
= EVCH_SUB_DUMP
;
487 if ((subattr
= sysevent_subattr_alloc()) == NULL
)
488 fmd_hdl_abort(hdl
, "failed to allocate subscription "
491 sysevent_subattr_thrcreate(subattr
, fmd_doorthr_create
, NULL
);
492 sysevent_subattr_thrsetup(subattr
, fmd_doorthr_setup
, NULL
);
494 errno
= sysevent_evc_xsubscribe(sysev_evc
,
495 sysev_sid
, sysev_class
, sysev_recv
, sysev_xprt
, flags
, subattr
);
498 if (errno
== EEXIST
) {
499 fmd_hdl_abort(hdl
, "another fault management daemon is "
500 "active on transport channel %s\n", sysev_channel
);
502 fmd_hdl_abort(hdl
, "failed to xsubscribe to %s on "
503 "transport channel %s", sysev_class
, sysev_channel
);
508 * Once the transport is open, install a single timer to fire at once
509 * in the context of the module's thread to run sysev_replay(). This
510 * thread will block in its first fmd_xprt_post() until fmd is ready.
512 fmd_hdl_debug(hdl
, "transport '%s' open\n", sysev_channel
);
513 (void) fmd_timer_install(hdl
, NULL
, NULL
, 0);
516 * Open the legacy sysevent handle and subscribe to all events. These
517 * are automatically converted to "resource.sysevent.*" events so that
518 * modules can manage these events without additional infrastructure.
523 if ((fmd
.d_sysev_hdl
=
524 sysevent_bind_xhandle(sysev_legacy
, subattr
)) == NULL
)
525 fmd_hdl_abort(hdl
, "failed to bind to legacy sysevent channel");
527 if (sysevent_subscribe_event(fmd
.d_sysev_hdl
, EC_ALL
,
529 fmd_hdl_abort(hdl
, "failed to subscribe to legacy sysevents");
533 * Close the channel by unsubscribing and unbinding. We only do this when a
534 * a non-default channel has been selected. If we're using FM_ERROR_CHAN,
535 * the system default, we do *not* want to unsubscribe because the kernel will
536 * remove the subscriber queue and any events published in our absence will
537 * therefore be lost. This scenario may occur when, for example, fmd is sent
538 * a SIGTERM by init(1M) during reboot but an error is detected and makes it
539 * into the sysevent channel queue before init(1M) manages to call uadmin(2).
542 sysev_fini(fmd_hdl_t
*hdl
)
544 if (strcmp(sysev_channel
, FM_ERROR_CHAN
) != 0) {
545 (void) sysevent_evc_unsubscribe(sysev_evc
, sysev_sid
);
546 (void) sysevent_evc_unbind(sysev_evc
);
549 if (fmd
.d_sysev_hdl
!= NULL
)
550 sysevent_unbind_handle(fmd
.d_sysev_hdl
);
552 if (subattr
!= NULL
) {
553 sysevent_subattr_free(subattr
);
557 if (sysev_xprt
!= NULL
) {
559 * Wait callback returns before destroy the transport.
561 (void) pthread_mutex_lock(&sysev_mutex
);
563 while (sysev_xprt_refcnt
> 0)
564 (void) pthread_cond_wait(&sysev_cv
, &sysev_mutex
);
565 (void) pthread_mutex_unlock(&sysev_mutex
);
566 fmd_xprt_close(hdl
, sysev_xprt
);
569 fmd_prop_free_string(hdl
, sysev_class
);
570 fmd_prop_free_string(hdl
, sysev_channel
);
571 fmd_prop_free_string(hdl
, sysev_device
);
572 fmd_prop_free_string(hdl
, sysev_sid
);