4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/utsname.h>
28 #include <sys/param.h>
29 #include <sys/systeminfo.h>
30 #include <sys/fm/util.h>
42 #include <fmd_dispq.h>
43 #include <fmd_timerq.h>
45 #include <fmd_error.h>
46 #include <fmd_module.h>
47 #include <fmd_thread.h>
48 #include <fmd_alloc.h>
49 #include <fmd_string.h>
50 #include <fmd_builtin.h>
51 #include <fmd_ustat.h>
52 #include <fmd_protocol.h>
53 #include <fmd_scheme.h>
57 #include <fmd_idspace.h>
63 #include <sys/openpromio.h>
64 #include <libdevinfo.h>
68 extern const nv_alloc_ops_t fmd_nv_alloc_ops
; /* see fmd_nv.c */
70 const char _fmd_version
[] = "1.2"; /* daemon version string */
71 static char _fmd_plat
[MAXNAMELEN
]; /* native platform string */
72 static char _fmd_isa
[MAXNAMELEN
]; /* native instruction set */
73 static struct utsname _fmd_uts
; /* native uname(2) info */
74 static char _fmd_psn
[MAXNAMELEN
]; /* product serial number */
75 static char _fmd_csn
[MAXNAMELEN
]; /* chassis serial number */
76 static char _fmd_prod
[MAXNAMELEN
]; /* product name string */
79 * Note: the configuration file path is ordered from most common to most host-
80 * specific because new conf files are merged/override previous ones. The
81 * module paths are in the opposite order, from most specific to most common,
82 * because once a module is loaded fmd will not try to load over the same name.
85 static const char _fmd_conf_path
[] =
87 "%r/usr/platform/%m/lib/fm/fmd:"
88 "%r/usr/platform/%i/lib/fm/fmd:"
91 static const char _fmd_agent_path
[] =
92 "%r/usr/platform/%i/lib/fm/fmd/agents:"
93 "%r/usr/platform/%m/lib/fm/fmd/agents:"
94 "%r/usr/lib/fm/fmd/agents";
96 static const char _fmd_plugin_path
[] =
97 "%r/usr/platform/%i/lib/fm/fmd/plugins:"
98 "%r/usr/platform/%m/lib/fm/fmd/plugins:"
99 "%r/usr/lib/fm/fmd/plugins";
101 static const char _fmd_scheme_path
[] =
102 "usr/lib/fm/fmd/schemes";
104 static const fmd_conf_mode_t _fmd_cerror_modes
[] = {
105 { "unload", "unload offending client module", FMD_CERROR_UNLOAD
},
106 { "stop", "stop daemon for debugger attach", FMD_CERROR_STOP
},
107 { "abort", "abort daemon and force core dump", FMD_CERROR_ABORT
},
111 static const fmd_conf_mode_t _fmd_dbout_modes
[] = {
112 { "stderr", "send debug messages to stderr", FMD_DBOUT_STDERR
},
113 { "syslog", "send debug messages to syslog", FMD_DBOUT_SYSLOG
},
117 static const fmd_conf_mode_t _fmd_debug_modes
[] = {
118 { "help", "display debugging modes and exit", FMD_DBG_HELP
},
119 { "mod", "debug module load/unload/locking", FMD_DBG_MOD
},
120 { "disp", "debug dispatch queue processing", FMD_DBG_DISP
},
121 { "xprt", "debug transport-specific routines", FMD_DBG_XPRT
},
122 { "evt", "debug event subsystem routines", FMD_DBG_EVT
},
123 { "log", "debug log subsystem routines", FMD_DBG_LOG
},
124 { "tmr", "debug timer subsystem routines", FMD_DBG_TMR
},
125 { "fmri", "debug fmri subsystem routines", FMD_DBG_FMRI
},
126 { "asru", "debug asru subsystem routines", FMD_DBG_ASRU
},
127 { "case", "debug case subsystem routines", FMD_DBG_CASE
},
128 { "ckpt", "debug checkpoint routines", FMD_DBG_CKPT
},
129 { "rpc", "debug rpc service routines", FMD_DBG_RPC
},
130 { "trace", "display matching trace calls", FMD_DBG_TRACE
},
131 { "all", "enable all available debug modes", FMD_DBG_ALL
},
136 fmd_cerror_set(fmd_conf_param_t
*pp
, const char *value
)
138 return (fmd_conf_mode_set(_fmd_cerror_modes
, pp
, value
));
142 fmd_dbout_set(fmd_conf_param_t
*pp
, const char *value
)
144 return (fmd_conf_mode_set(_fmd_dbout_modes
, pp
, value
));
148 fmd_debug_set(fmd_conf_param_t
*pp
, const char *value
)
150 int err
= fmd_conf_mode_set(_fmd_debug_modes
, pp
, value
);
153 fmd
.d_fmd_debug
= pp
->cp_value
.cpv_num
;
159 fmd_trmode_set(fmd_conf_param_t
*pp
, const char *value
)
161 fmd_tracebuf_f
*func
;
163 if (strcasecmp(value
, "none") == 0)
164 func
= fmd_trace_none
;
165 else if (strcasecmp(value
, "lite") == 0)
166 func
= fmd_trace_lite
;
167 else if (strcasecmp(value
, "full") == 0)
168 func
= fmd_trace_full
;
170 return (fmd_set_errno(EFMD_CONF_INVAL
));
172 fmd
.d_thr_trace
= (void (*)())func
;
173 pp
->cp_value
.cpv_ptr
= (void *)func
;
178 fmd_trmode_get(const fmd_conf_param_t
*pp
, void *ptr
)
180 *((void **)ptr
) = pp
->cp_value
.cpv_ptr
;
184 fmd_clkmode_set(fmd_conf_param_t
*pp
, const char *value
)
186 const fmd_timeops_t
*ops
;
188 if (strcasecmp(value
, "native") == 0)
189 ops
= &fmd_timeops_native
;
190 else if (strcasecmp(value
, "simulated") == 0)
191 ops
= &fmd_timeops_simulated
;
193 return (fmd_set_errno(EFMD_CONF_INVAL
));
195 fmd
.d_clockops
= ops
;
196 pp
->cp_value
.cpv_ptr
= (void *)ops
;
201 fmd_clkmode_get(const fmd_conf_param_t
*pp
, void *ptr
)
203 *((void **)ptr
) = pp
->cp_value
.cpv_ptr
;
206 static const fmd_conf_ops_t fmd_cerror_ops
= {
207 fmd_cerror_set
, fmd_conf_mode_get
, fmd_conf_notsup
, fmd_conf_nop
210 static const fmd_conf_ops_t fmd_dbout_ops
= {
211 fmd_dbout_set
, fmd_conf_mode_get
, fmd_conf_notsup
, fmd_conf_nop
214 static const fmd_conf_ops_t fmd_debug_ops
= {
215 fmd_debug_set
, fmd_conf_mode_get
, fmd_conf_notsup
, fmd_conf_nop
218 static const fmd_conf_ops_t fmd_trmode_ops
= {
219 fmd_trmode_set
, fmd_trmode_get
, fmd_conf_notsup
, fmd_conf_nop
222 static const fmd_conf_ops_t fmd_clkmode_ops
= {
223 fmd_clkmode_set
, fmd_clkmode_get
, fmd_conf_notsup
, fmd_conf_nop
226 static const fmd_conf_formal_t _fmd_conf
[] = {
227 { "agent.path", &fmd_conf_path
, _fmd_agent_path
}, /* path for agents */
228 { "alloc_msecs", &fmd_conf_uint32
, "10" }, /* msecs before alloc retry */
229 { "alloc_tries", &fmd_conf_uint32
, "3" }, /* max # of alloc retries */
230 { "product_sn", &fmd_conf_string
, _fmd_psn
}, /* product serial number */
231 { "chassis", &fmd_conf_string
, _fmd_csn
}, /* chassis serial number */
232 { "ckpt.dir", &fmd_conf_string
, "var/fm/fmd/ckpt" }, /* ckpt directory path */
233 { "ckpt.dirmode", &fmd_conf_int32
, "0755" }, /* ckpt directory perm mode */
234 { "ckpt.mode", &fmd_conf_int32
, "0644" }, /* ckpt file perm mode */
235 { "ckpt.restore", &fmd_conf_bool
, "true" }, /* restore checkpoints? */
236 { "ckpt.save", &fmd_conf_bool
, "true" }, /* save checkpoints? */
237 { "ckpt.zero", &fmd_conf_bool
, "false" }, /* zero checkpoints on start? */
238 { "client.buflim", &fmd_conf_size
, "10m" }, /* client buffer space limit */
239 { "client.dbout", &fmd_dbout_ops
, NULL
}, /* client debug output sinks */
240 { "client.debug", &fmd_conf_bool
, NULL
}, /* client debug enable */
241 { "client.doorthrlim", &fmd_conf_uint32
, "20" }, /* client door thread limit */
242 { "client.error", &fmd_cerror_ops
, "unload" }, /* client error policy */
243 { "client.memlim", &fmd_conf_size
, "10m" }, /* client allocation limit */
244 { "client.evqlim", &fmd_conf_uint32
, "256" }, /* client event queue limit */
245 { "client.thrlim", &fmd_conf_uint32
, "20" }, /* client aux thread limit */
246 { "client.thrsig", &fmd_conf_signal
, "SIGUSR1" }, /* fmd_thr_signal() value */
247 { "client.tmrlim", &fmd_conf_uint32
, "1024" }, /* client pending timer limit */
248 { "client.xprtlim", &fmd_conf_uint32
, "256" }, /* client transport limit */
249 { "client.xprtlog", &fmd_conf_bool
, NULL
}, /* client transport logging? */
250 { "client.xprtqlim", &fmd_conf_uint32
, "1024" }, /* client transport queue li */
251 { "clock", &fmd_clkmode_ops
, "native" }, /* clock operation mode */
252 { "conf_path", &fmd_conf_path
, _fmd_conf_path
}, /* root config file path */
253 { "conf_file", &fmd_conf_string
, "fmd.conf" }, /* root config file name */
254 { "core", &fmd_conf_bool
, "false" }, /* force core dump on quit */
255 { "dbout", &fmd_dbout_ops
, NULL
}, /* daemon debug output sinks */
256 { "debug", &fmd_debug_ops
, NULL
}, /* daemon debugging flags */
257 { "dictdir", &fmd_conf_string
, "usr/lib/fm/dict" }, /* default diagcode dir */
258 { "domain", &fmd_conf_string
, NULL
}, /* domain id for de auth */
259 { "fakenotpresent", &fmd_conf_uint32
, "0" }, /* simulate rsrc not present */
260 { "fg", &fmd_conf_bool
, "false" }, /* run daemon in foreground */
261 { "gc_interval", &fmd_conf_time
, "1d" }, /* garbage collection intvl */
262 { "ids.avg", &fmd_conf_uint32
, "4" }, /* desired idspace chain len */
263 { "ids.max", &fmd_conf_uint32
, "1024" }, /* maximum idspace buckets */
264 { "isaname", &fmd_conf_string
, _fmd_isa
}, /* instruction set (uname -p) */
265 { "log.creator", &fmd_conf_string
, "fmd" }, /* exacct log creator string */
266 { "log.error", &fmd_conf_string
, "var/fm/fmd/errlog" }, /* error log path */
267 { "log.fault", &fmd_conf_string
, "var/fm/fmd/fltlog" }, /* fault log path */
268 { "log.info", &fmd_conf_string
, "var/fm/fmd/infolog" }, /* info log path */
269 { "log.info_hival", &fmd_conf_string
, "var/fm/fmd/infolog_hival" }, /* hi pri */
270 { "log.minfree", &fmd_conf_size
, "2m" }, /* min log fsys free space */
271 { "log.rsrc", &fmd_conf_string
, "var/fm/fmd/rsrc" }, /* asru log dir path */
272 { "log.tryrotate", &fmd_conf_uint32
, "10" }, /* max log rotation attempts */
273 { "log.waitrotate", &fmd_conf_time
, "200ms" }, /* log rotation retry delay */
274 { "log.xprt", &fmd_conf_string
, "var/fm/fmd/xprt" }, /* transport log dir */
275 { "machine", &fmd_conf_string
, _fmd_uts
.machine
}, /* machine name (uname -m) */
276 { "nodiagcode", &fmd_conf_string
, "-" }, /* diagcode to use if error */
277 { "repaircode", &fmd_conf_string
, "-" }, /* diagcode for list.repaired */
278 { "resolvecode", &fmd_conf_string
, "-" }, /* diagcode for list.resolved */
279 { "updatecode", &fmd_conf_string
, "-" }, /* diagcode for list.updated */
280 { "osrelease", &fmd_conf_string
, _fmd_uts
.release
}, /* release (uname -r) */
281 { "osversion", &fmd_conf_string
, _fmd_uts
.version
}, /* version (uname -v) */
282 { "platform", &fmd_conf_string
, _fmd_plat
}, /* platform string (uname -i) */
283 { "plugin.close", &fmd_conf_bool
, "true" }, /* dlclose plugins on fini */
284 { "plugin.path", &fmd_conf_path
, _fmd_plugin_path
}, /* path for plugin mods */
285 { "product", &fmd_conf_string
, _fmd_prod
}, /* product name string */
286 { "rootdir", &fmd_conf_string
, "" }, /* root directory for paths */
287 { "rpc.adm.path", &fmd_conf_string
, NULL
}, /* FMD_ADM rendezvous file */
288 { "rpc.adm.prog", &fmd_conf_uint32
, "100169" }, /* FMD_ADM rpc program num */
289 { "rpc.api.path", &fmd_conf_string
, NULL
}, /* FMD_API rendezvous file */
290 { "rpc.api.prog", &fmd_conf_uint32
, "100170" }, /* FMD_API rpc program num */
291 { "rpc.rcvsize", &fmd_conf_size
, "128k" }, /* rpc receive buffer size */
292 { "rpc.sndsize", &fmd_conf_size
, "128k" }, /* rpc send buffer size */
293 { "rsrc.pollperiod", &fmd_conf_time
, "1h" }, /* aged rsrcs poller period */
294 { "rsrc.age", &fmd_conf_time
, "30d" }, /* max age of old rsrc log */
295 { "rsrc.zero", &fmd_conf_bool
, "false" }, /* zero rsrc cache on start? */
296 { "schemedir", &fmd_conf_string
, _fmd_scheme_path
}, /* path for scheme mods */
297 { "self.name", &fmd_conf_string
, "fmd-self-diagnosis" }, /* self-diag module */
298 { "self.dict", &fmd_conf_list
, "FMD.dict" }, /* self-diag dictionary list */
299 { "server", &fmd_conf_string
, _fmd_uts
.nodename
}, /* server id for de auth */
300 { "strbuckets", &fmd_conf_uint32
, "211" }, /* size of string hashes */
302 { "trace.mode", &fmd_trmode_ops
, "full" }, /* trace mode: none/lite/full */
304 { "trace.mode", &fmd_trmode_ops
, "lite" }, /* trace mode: none/lite/full */
306 { "trace.recs", &fmd_conf_uint32
, "128" }, /* trace records per thread */
307 { "trace.frames", &fmd_conf_uint32
, "16" }, /* max trace rec stack frames */
308 { "uuidlen", &fmd_conf_uint32
, "36" }, /* UUID ASCII string length */
309 { "xprt.ttl", &fmd_conf_uint8
, "1" }, /* default event time-to-live */
313 * Statistics maintained by fmd itself on behalf of various global subsystems.
314 * NOTE: FMD_TYPE_STRING statistics should not be used here. If they are
315 * required in the future, the FMD_ADM_MODGSTAT service routine must change.
317 static fmd_statistics_t _fmd_stats
= {
318 { "errlog.replayed", FMD_TYPE_UINT64
, "total events replayed from errlog" },
319 { "errlog.partials", FMD_TYPE_UINT64
, "events partially committed in errlog" },
320 { "errlog.enospc", FMD_TYPE_UINT64
, "events not appended to errlog (ENOSPC)" },
321 { "fltlog.enospc", FMD_TYPE_UINT64
, "events not appended to fltlog (ENOSPC)" },
322 { "log.enospc", FMD_TYPE_UINT64
, "events not appended to other logs (ENOSPC)" },
323 { "dr.gen", FMD_TYPE_UINT64
, "dynamic reconfiguration generation" },
324 { "topo.gen", FMD_TYPE_UINT64
, "topology snapshot generation" },
325 { "topo.drgen", FMD_TYPE_UINT64
, "current topology DR generation number" },
329 * SMBIOS serial numbers can contain characters (particularly ':' and ' ')
330 * that are invalid for the authority and can break FMRI parsing. We translate
331 * any invalid characters to a safe '-', as well as trimming any leading or
332 * trailing whitespace. Similarly, '/' can be found in some product names
333 * so we translate that to '-'.
336 fmd_cleanup_auth_str(char *buf
, const char *begin
)
338 const char *end
, *cp
;
342 end
= begin
+ strlen(begin
);
344 while (begin
< end
&& isspace(*begin
))
346 while (begin
< end
&& isspace(*(end
- 1)))
353 for (i
= 0; i
< MAXNAMELEN
- 1; i
++) {
357 if (c
== ':' || c
== '=' || c
== '/' || isspace(c
) ||
368 fmd_create(fmd_t
*dp
, const char *arg0
, const char *root
, const char *conf
)
370 fmd_conf_path_t
*pap
;
372 const char *name
, *psn
, *csn
;
381 di_prom_handle_t promh
= DI_PROM_HANDLE_NIL
;
382 di_node_t rooth
= DI_NODE_NIL
;
385 (void) sysinfo(SI_PLATFORM
, _fmd_plat
, sizeof (_fmd_plat
));
386 (void) sysinfo(SI_ARCHITECTURE
, _fmd_isa
, sizeof (_fmd_isa
));
387 (void) uname(&_fmd_uts
);
389 if ((shp
= smbios_open(NULL
, SMB_VERSION
, 0, NULL
)) != NULL
) {
390 if ((id
= smbios_info_system(shp
, &s1
)) != SMB_ERR
&&
391 smbios_info_common(shp
, id
, &s2
) != SMB_ERR
)
392 fmd_cleanup_auth_str(_fmd_prod
, s2
.smbi_product
);
394 if ((psn
= smbios_psn(shp
)) != NULL
)
395 fmd_cleanup_auth_str(_fmd_psn
, psn
);
397 if ((csn
= smbios_csn(shp
)) != NULL
)
398 fmd_cleanup_auth_str(_fmd_csn
, csn
);
401 } else if ((rooth
= di_init("/", DINFOPROP
)) != DI_NODE_NIL
&&
402 (promh
= di_prom_init()) != DI_PROM_HANDLE_NIL
) {
403 if (di_prom_prop_lookup_bytes(promh
, rooth
, "chassis-sn",
404 (unsigned char **)&bufp
) != -1) {
405 fmd_cleanup_auth_str(_fmd_csn
, bufp
);
409 if (promh
!= DI_PROM_HANDLE_NIL
)
411 if (rooth
!= DI_NODE_NIL
)
414 bzero(dp
, sizeof (fmd_t
));
416 dp
->d_version
= _fmd_version
;
417 dp
->d_pname
= fmd_strbasename(arg0
);
418 dp
->d_pid
= getpid();
420 if (pthread_key_create(&dp
->d_key
, NULL
) != 0)
421 fmd_error(EFMD_EXIT
, "failed to create pthread key");
423 (void) pthread_mutex_init(&dp
->d_xprt_lock
, NULL
);
424 (void) pthread_mutex_init(&dp
->d_err_lock
, NULL
);
425 (void) pthread_mutex_init(&dp
->d_thr_lock
, NULL
);
426 (void) pthread_mutex_init(&dp
->d_mod_lock
, NULL
);
427 (void) pthread_mutex_init(&dp
->d_stats_lock
, NULL
);
428 (void) pthread_mutex_init(&dp
->d_topo_lock
, NULL
);
429 (void) pthread_rwlock_init(&dp
->d_log_lock
, NULL
);
430 (void) pthread_rwlock_init(&dp
->d_hvilog_lock
, NULL
);
431 (void) pthread_rwlock_init(&dp
->d_ilog_lock
, NULL
);
432 (void) pthread_mutex_init(&dp
->d_fmd_lock
, NULL
);
433 (void) pthread_cond_init(&dp
->d_fmd_cv
, NULL
);
436 * A small number of properties must be set manually before we open
437 * the root configuration file. These include any settings for our
438 * memory allocator and path expansion token values, because these
439 * values are needed by the routines in fmd_conf.c itself. After
440 * the root configuration file is processed, we reset these properties
441 * based upon the latest values from the configuration file.
443 dp
->d_alloc_msecs
= 10;
444 dp
->d_alloc_tries
= 3;
445 dp
->d_str_buckets
= 211;
447 dp
->d_rootdir
= root
? root
: "";
448 dp
->d_platform
= _fmd_plat
;
449 dp
->d_machine
= _fmd_uts
.machine
;
450 dp
->d_isaname
= _fmd_isa
;
452 dp
->d_conf
= fmd_conf_open(conf
, sizeof (_fmd_conf
) /
453 sizeof (_fmd_conf
[0]), _fmd_conf
, FMD_CONF_DEFER
);
455 if (dp
->d_conf
== NULL
) {
457 "failed to load required configuration properties\n");
460 (void) fmd_conf_getprop(dp
->d_conf
, "alloc.msecs", &dp
->d_alloc_msecs
);
461 (void) fmd_conf_getprop(dp
->d_conf
, "alloc.tries", &dp
->d_alloc_tries
);
462 (void) fmd_conf_getprop(dp
->d_conf
, "strbuckets", &dp
->d_str_buckets
);
464 (void) fmd_conf_getprop(dp
->d_conf
, "platform", &dp
->d_platform
);
465 (void) fmd_conf_getprop(dp
->d_conf
, "machine", &dp
->d_machine
);
466 (void) fmd_conf_getprop(dp
->d_conf
, "isaname", &dp
->d_isaname
);
469 * Manually specified rootdirs override config files, so only update
470 * d_rootdir based on the config files we parsed if no 'root' was set.
473 (void) fmd_conf_getprop(dp
->d_conf
, "rootdir", &dp
->d_rootdir
);
475 (void) fmd_conf_setprop(dp
->d_conf
, "rootdir", dp
->d_rootdir
);
478 * Once the base conf file properties are loaded, lookup the values
479 * of $conf_path and $conf_file and merge in any other conf files.
481 (void) fmd_conf_getprop(dp
->d_conf
, "conf_path", &pap
);
482 (void) fmd_conf_getprop(dp
->d_conf
, "conf_file", &name
);
484 for (i
= 0; i
< pap
->cpa_argc
; i
++) {
485 (void) snprintf(file
, sizeof (file
),
486 "%s/%s", pap
->cpa_argv
[i
], name
);
487 if (access(file
, F_OK
) == 0)
488 fmd_conf_merge(dp
->d_conf
, file
);
492 * Update the value of fmd.d_fg based on "fg". We cache this property
493 * because it must be accessed deep within fmd at fmd_verror() time.
494 * Update any other properties that must be cached for performance.
496 (void) fmd_conf_getprop(fmd
.d_conf
, "fg", &fmd
.d_fg
);
497 (void) fmd_conf_getprop(fmd
.d_conf
, "xprt.ttl", &fmd
.d_xprt_ttl
);
500 * Initialize our custom libnvpair allocator and create an nvlist for
501 * authority elements corresponding to this instance of the daemon.
503 (void) nv_alloc_init(&dp
->d_nva
, &fmd_nv_alloc_ops
);
504 dp
->d_auth
= fmd_protocol_authority();
507 * The fmd_module_t for the root module must be created manually. Most
508 * of it remains unused and zero, except for the few things we fill in.
510 dp
->d_rmod
= fmd_zalloc(sizeof (fmd_module_t
), FMD_SLEEP
);
511 dp
->d_rmod
->mod_name
= fmd_strdup(dp
->d_pname
, FMD_SLEEP
);
512 dp
->d_rmod
->mod_fmri
= fmd_protocol_fmri_module(dp
->d_rmod
);
514 fmd_list_append(&dp
->d_mod_list
, dp
->d_rmod
);
515 fmd_module_hold(dp
->d_rmod
);
517 (void) pthread_mutex_init(&dp
->d_rmod
->mod_lock
, NULL
);
518 (void) pthread_cond_init(&dp
->d_rmod
->mod_cv
, NULL
);
519 (void) pthread_mutex_init(&dp
->d_rmod
->mod_stats_lock
, NULL
);
521 dp
->d_rmod
->mod_thread
= fmd_thread_xcreate(dp
->d_rmod
, pthread_self());
522 dp
->d_rmod
->mod_stats
= fmd_zalloc(sizeof (fmd_modstat_t
), FMD_SLEEP
);
523 dp
->d_rmod
->mod_ustat
= fmd_ustat_create();
525 if (pthread_setspecific(dp
->d_key
, dp
->d_rmod
->mod_thread
) != 0)
526 fmd_error(EFMD_EXIT
, "failed to attach main thread key");
528 if ((dp
->d_stats
= (fmd_statistics_t
*)fmd_ustat_insert(
529 dp
->d_rmod
->mod_ustat
, FMD_USTAT_NOALLOC
, sizeof (_fmd_stats
) /
530 sizeof (fmd_stat_t
), (fmd_stat_t
*)&_fmd_stats
, NULL
)) == NULL
)
531 fmd_error(EFMD_EXIT
, "failed to initialize statistics");
533 (void) pthread_mutex_lock(&dp
->d_rmod
->mod_lock
);
534 dp
->d_rmod
->mod_flags
|= FMD_MOD_INIT
;
535 (void) pthread_mutex_unlock(&dp
->d_rmod
->mod_lock
);
538 * In addition to inserting the _fmd_stats collection of program-wide
539 * statistics, we also insert a statistic named after each of our
540 * errors and update these counts in fmd_verror() (see fmd_subr.c).
542 dp
->d_errstats
= sp
= fmd_zalloc(sizeof (fmd_stat_t
) *
543 (EFMD_END
- EFMD_UNKNOWN
), FMD_SLEEP
);
545 for (i
= 0; i
< EFMD_END
- EFMD_UNKNOWN
; i
++, sp
++) {
546 (void) snprintf(sp
->fmds_name
, sizeof (sp
->fmds_name
), "err.%s",
547 strrchr(fmd_errclass(EFMD_UNKNOWN
+ i
), '.') + 1);
548 sp
->fmds_type
= FMD_TYPE_UINT64
;
551 (void) fmd_ustat_insert(dp
->d_rmod
->mod_ustat
, FMD_USTAT_NOALLOC
,
552 EFMD_END
- EFMD_UNKNOWN
, dp
->d_errstats
, NULL
);
556 fmd_destroy(fmd_t
*dp
)
562 (void) fmd_conf_getprop(fmd
.d_conf
, "core", &core
);
566 if (dp
->d_xprt_ids
!= NULL
)
567 fmd_xprt_suspend_all();
570 * Unload the self-diagnosis module first. This ensures that it does
571 * not get confused as we start unloading other modules, etc. We must
572 * hold the dispq lock as a writer while doing so since it uses d_self.
574 if (dp
->d_self
!= NULL
) {
577 (void) pthread_rwlock_wrlock(&dp
->d_disp
->dq_lock
);
580 (void) pthread_rwlock_unlock(&dp
->d_disp
->dq_lock
);
582 fmd_module_unload(self
);
583 fmd_module_rele(self
);
587 * Unload modules in reverse order *except* for the root module, which
588 * is first in the list. This allows it to keep its thread and trace.
590 for (mp
= fmd_list_prev(&dp
->d_mod_list
); mp
!= dp
->d_rmod
; ) {
591 fmd_module_unload(mp
);
592 mp
= fmd_list_prev(mp
);
595 if (dp
->d_mod_hash
!= NULL
) {
596 fmd_modhash_destroy(dp
->d_mod_hash
);
597 dp
->d_mod_hash
= NULL
;
601 * Close both log files now that modules are no longer active. We must
602 * set these pointers to NULL in case any subsequent errors occur.
604 if (dp
->d_errlog
!= NULL
) {
605 fmd_log_rele(dp
->d_errlog
);
609 if (dp
->d_fltlog
!= NULL
) {
610 fmd_log_rele(dp
->d_fltlog
);
615 * Now destroy the resource cache: each ASRU contains a case reference,
616 * which may in turn contain a pointer to a referenced owning module.
618 if (dp
->d_asrus
!= NULL
) {
619 fmd_asru_hash_destroy(dp
->d_asrus
);
624 * Now that all data structures that refer to modules are torn down,
625 * no modules should be remaining on the module list except for d_rmod.
626 * If we trip one of these assertions, we're missing a rele somewhere.
628 ASSERT(fmd_list_prev(&dp
->d_mod_list
) == dp
->d_rmod
);
629 ASSERT(fmd_list_next(&dp
->d_mod_list
) == dp
->d_rmod
);
632 * Now destroy the root module. We clear its thread key first so any
633 * calls to fmd_trace() inside of the module code will be ignored.
635 (void) pthread_setspecific(dp
->d_key
, NULL
);
636 fmd_module_lock(dp
->d_rmod
);
638 while ((cp
= fmd_list_next(&dp
->d_rmod
->mod_cases
)) != NULL
)
639 fmd_case_discard(cp
, B_FALSE
);
641 fmd_module_unlock(dp
->d_rmod
);
642 fmd_free(dp
->d_rmod
->mod_stats
, sizeof (fmd_modstat_t
));
643 dp
->d_rmod
->mod_stats
= NULL
;
645 (void) pthread_mutex_lock(&dp
->d_rmod
->mod_lock
);
646 dp
->d_rmod
->mod_flags
|= FMD_MOD_FINI
;
647 (void) pthread_mutex_unlock(&dp
->d_rmod
->mod_lock
);
649 fmd_module_rele(dp
->d_rmod
);
650 ASSERT(fmd_list_next(&dp
->d_mod_list
) == NULL
);
653 * Now destroy the remaining global data structures. If 'core' was
654 * set to true, force a core dump so we can check for memory leaks.
656 if (dp
->d_cases
!= NULL
)
657 fmd_case_hash_destroy(dp
->d_cases
);
658 if (dp
->d_disp
!= NULL
)
659 fmd_dispq_destroy(dp
->d_disp
);
660 if (dp
->d_timers
!= NULL
)
661 fmd_timerq_destroy(dp
->d_timers
);
662 if (dp
->d_schemes
!= NULL
)
663 fmd_scheme_hash_destroy(dp
->d_schemes
);
664 if (dp
->d_xprt_ids
!= NULL
)
665 fmd_idspace_destroy(dp
->d_xprt_ids
);
667 if (dp
->d_errstats
!= NULL
) {
668 fmd_free(dp
->d_errstats
,
669 sizeof (fmd_stat_t
) * (EFMD_END
- EFMD_UNKNOWN
));
672 if (dp
->d_conf
!= NULL
)
673 fmd_conf_close(dp
->d_conf
);
677 nvlist_free(dp
->d_auth
);
678 (void) nv_alloc_fini(&dp
->d_nva
);
679 dp
->d_clockops
->fto_fini(dp
->d_clockptr
);
681 (void) pthread_key_delete(dp
->d_key
);
682 bzero(dp
, sizeof (fmd_t
));
685 fmd_panic("forcing core dump at user request\n");
690 fmd_gc(fmd_t
*dp
, id_t id
, hrtime_t hrt
)
695 TRACE((FMD_DBG_MOD
, "garbage collect start"));
696 fmd_modhash_apply(dp
->d_mod_hash
, fmd_module_gc
);
697 TRACE((FMD_DBG_MOD
, "garbage collect end"));
699 (void) pthread_rwlock_rdlock(&dp
->d_log_lock
);
700 fmd_log_update(dp
->d_errlog
);
701 (void) pthread_rwlock_unlock(&dp
->d_log_lock
);
703 (void) pthread_rwlock_rdlock(&dp
->d_hvilog_lock
);
704 fmd_log_update(dp
->d_hvilog
);
705 (void) pthread_rwlock_unlock(&dp
->d_hvilog_lock
);
707 (void) pthread_rwlock_rdlock(&dp
->d_ilog_lock
);
708 fmd_log_update(dp
->d_ilog
);
709 (void) pthread_rwlock_unlock(&dp
->d_ilog_lock
);
712 (void) fmd_conf_getprop(dp
->d_conf
, "gc_interval", &delta
);
713 (void) fmd_timerq_install(dp
->d_timers
, dp
->d_rmod
->mod_timerids
,
714 (fmd_timer_f
*)fmd_gc
, dp
, NULL
, delta
);
719 fmd_clear_aged_rsrcs(fmd_t
*dp
, id_t id
, hrtime_t hrt
)
723 fmd_asru_clear_aged_rsrcs();
724 (void) fmd_conf_getprop(dp
->d_conf
, "rsrc.pollperiod", &period
);
725 (void) fmd_timerq_install(dp
->d_timers
, dp
->d_rmod
->mod_timerids
,
726 (fmd_timer_f
*)fmd_clear_aged_rsrcs
, dp
, NULL
, period
);
730 * Events are committed to the errlog after cases are checkpointed. If fmd
731 * crashes before an event is ever associated with a module, this function will
732 * be called to replay it to all subscribers. If fmd crashes in between the
733 * subscriber checkpointing and committing the event in the error log, the
734 * module will have seen the event and we don't want to replay it. So we look
735 * for the event in all modules and transition it to the proper state. If
736 * it is found, we commit it to the error log and do not replay it. The in-
737 * memory case search used by fmd_module_contains() et al isn't particularly
738 * efficient, but it is faster than doing read i/o's on every case event to
739 * check their status or write i/o's on every event to replay to update states.
740 * We can improve the efficiency of this lookup algorithm later if necessary.
744 fmd_err_replay(fmd_log_t
*lp
, fmd_event_t
*ep
, fmd_t
*dp
)
749 (void) pthread_mutex_lock(&dp
->d_mod_lock
);
751 for (mp
= fmd_list_next(&dp
->d_mod_list
);
752 mp
!= NULL
; mp
= fmd_list_next(mp
)) {
753 if (fmd_module_contains(mp
, ep
)) {
759 (void) pthread_mutex_unlock(&dp
->d_mod_lock
);
762 fmd_event_commit(ep
);
764 sp
= &dp
->d_stats
->ds_log_partials
;
766 fmd_dispq_dispatch(dp
->d_disp
, ep
, FMD_EVENT_DATA(ep
));
767 sp
= &dp
->d_stats
->ds_log_replayed
;
770 (void) pthread_mutex_lock(&dp
->d_stats_lock
);
771 sp
->fmds_value
.ui64
++;
772 (void) pthread_mutex_unlock(&dp
->d_stats_lock
);
776 fmd_door_server(void *dip
)
778 fmd_dprintf(FMD_DBG_XPRT
, "door server starting for %p\n", dip
);
779 (void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE
, NULL
);
780 (void) door_return(NULL
, 0, NULL
, 0);
784 * Custom door server create callback. Any fmd services that use doors will
785 * require those threads to have their fmd-specific TSD initialized, etc.
786 * Modules should use door_xcreate and derivatives such as
787 * sysevent_evc_xsubscribe in order to use private doors that
788 * avoid this global door server function (see fmd_api_module comments).
791 fmd_door(door_info_t
*dip
)
793 if (fmd_thread_create(fmd
.d_rmod
, fmd_door_server
, dip
) == NULL
)
794 fmd_panic("failed to create server for door %p", (void *)dip
);
798 * This signal handler is installed for the client.thrsig signal to be used to
799 * force an auxiliary thread to wake up from a system call and return EINTR in
800 * response to a module's use of fmd_thr_signal(). We also trace the event.
805 TRACE((FMD_DBG_MOD
, "module thread received sig #%d", sig
));
809 fmd_run(fmd_t
*dp
, int pfd
)
811 char *nodc_key
[] = { FMD_FLT_NODC
, NULL
};
812 char *repair_key
[] = { FM_LIST_REPAIRED_CLASS
, NULL
};
813 char *resolve_key
[] = { FM_LIST_RESOLVED_CLASS
, NULL
};
814 char *update_key
[] = { FM_LIST_UPDATED_CLASS
, NULL
};
816 struct sigaction act
;
818 int status
= FMD_EXIT_SUCCESS
;
820 fmd_conf_path_t
*pap
;
825 * Cache all the current debug property settings in d_fmd_debug,
826 * d_fmd_dbout, d_hdl_debug, and d_hdl_dbout. If a given debug mask
827 * is non-zero and the corresponding dbout mask is zero, set dbout
828 * to a sensible default value based on whether we have daemonized.
830 (void) fmd_conf_getprop(dp
->d_conf
, "dbout", &dbout
);
832 if (dp
->d_fmd_debug
!= 0 && dbout
== 0)
833 dp
->d_fmd_dbout
= dp
->d_fg
? FMD_DBOUT_STDERR
: FMD_DBOUT_SYSLOG
;
835 dp
->d_fmd_dbout
= dbout
;
837 (void) fmd_conf_getprop(dp
->d_conf
, "client.debug", &dp
->d_hdl_debug
);
838 (void) fmd_conf_getprop(dp
->d_conf
, "client.dbout", &dbout
);
840 if (dp
->d_hdl_debug
!= 0 && dbout
== 0)
841 dp
->d_hdl_dbout
= dp
->d_fg
? FMD_DBOUT_STDERR
: FMD_DBOUT_SYSLOG
;
843 dp
->d_hdl_dbout
= dbout
;
846 * Initialize remaining major program data structures such as the
847 * clock, dispatch queues, log files, module hash collections, etc.
848 * This work is done here rather than in fmd_create() to permit the -o
849 * command-line option to modify properties after fmd_create() is done.
851 name
= dp
->d_rootdir
!= NULL
&&
852 *dp
->d_rootdir
!= '\0' ? dp
->d_rootdir
: NULL
;
855 * The clock must be initialized before fmd_topo_init() because
856 * fmd_topo_update() calls fmd_time_gethrtime().
858 dp
->d_clockptr
= dp
->d_clockops
->fto_init();
862 dp
->d_xprt_ids
= fmd_idspace_create("xprt_ids", 1, INT_MAX
);
863 fmd_xprt_suspend_all();
865 (void) door_server_create(fmd_door
);
867 dp
->d_rmod
->mod_timerids
= fmd_idspace_create(dp
->d_pname
, 1, 16);
868 dp
->d_timers
= fmd_timerq_create();
869 dp
->d_disp
= fmd_dispq_create();
870 dp
->d_cases
= fmd_case_hash_create();
873 * The root module's mod_queue is created with limit zero, making it
874 * act like /dev/null; anything inserted here is simply ignored.
876 dp
->d_rmod
->mod_queue
= fmd_eventq_create(dp
->d_rmod
,
877 &dp
->d_rmod
->mod_stats
->ms_evqstat
, &dp
->d_rmod
->mod_stats_lock
, 0);
880 * Once our subsystems that use signals have been set up, install the
881 * signal handler for the fmd_thr_signal() API. Verify that the signal
882 * being used for this purpose doesn't conflict with something else.
884 (void) fmd_conf_getprop(dp
->d_conf
, "client.thrsig", &dp
->d_thr_sig
);
886 if (sigaction(dp
->d_thr_sig
, NULL
, &act
) != 0) {
887 fmd_error(EFMD_EXIT
, "invalid signal selected for "
888 "client.thrsig property: %d\n", dp
->d_thr_sig
);
891 if (act
.sa_handler
!= SIG_IGN
&& act
.sa_handler
!= SIG_DFL
) {
892 fmd_error(EFMD_EXIT
, "signal selected for client.thrsig "
893 "property is already in use: %d\n", dp
->d_thr_sig
);
896 act
.sa_handler
= fmd_signal
;
899 (void) sigemptyset(&act
.sa_mask
);
900 (void) sigaction(dp
->d_thr_sig
, &act
, NULL
);
902 (void) fmd_conf_getprop(dp
->d_conf
, "schemedir", &name
);
903 dp
->d_schemes
= fmd_scheme_hash_create(dp
->d_rootdir
, name
);
905 (void) fmd_conf_getprop(dp
->d_conf
, "log.rsrc", &name
);
906 dp
->d_asrus
= fmd_asru_hash_create(dp
->d_rootdir
, name
);
908 (void) fmd_conf_getprop(dp
->d_conf
, "log.error", &name
);
909 dp
->d_errlog
= fmd_log_open(dp
->d_rootdir
, name
, FMD_LOG_ERROR
);
911 (void) fmd_conf_getprop(dp
->d_conf
, "log.fault", &name
);
912 dp
->d_fltlog
= fmd_log_open(dp
->d_rootdir
, name
, FMD_LOG_FAULT
);
914 (void) fmd_conf_getprop(dp
->d_conf
, "log.info_hival", &name
);
915 dp
->d_hvilog
= fmd_log_open(dp
->d_rootdir
, name
, FMD_LOG_INFO
);
917 (void) fmd_conf_getprop(dp
->d_conf
, "log.info", &name
);
918 dp
->d_ilog
= fmd_log_open(dp
->d_rootdir
, name
, FMD_LOG_INFO
);
920 if (dp
->d_asrus
== NULL
|| dp
->d_errlog
== NULL
|| dp
->d_fltlog
== NULL
)
921 fmd_error(EFMD_EXIT
, "failed to initialize log files\n");
924 * Before loading modules, create an empty control event which will act
925 * as a global barrier for module event processing. Each module we
926 * load successfully will insert it at their head of their event queue,
927 * and then pause inside of fmd_ctl_rele() after dequeuing the event.
928 * This module barrier is required for two reasons:
930 * (a) During module loading, the restoration of case checkpoints may
931 * result in a list.* event being recreated for which the intended
932 * subscriber has not yet loaded depending on the load order. Such
933 * events could then result in spurious "no subscriber" errors.
935 * (b) During errlog replay, a sequence of errors from a long time ago
936 * may be replayed, and the module may attempt to install relative
937 * timers associated with one or more of these events. If errlog
938 * replay were "racing" with active module threads, an event E1
939 * that resulted in a relative timer T at time E1 + N nsec could
940 * fire prior to an event E2 being enqueued, even if the relative
941 * time ordering was E1 < E2 < E1 + N, causing mis-diagnosis.
943 dp
->d_mod_event
= e
= fmd_event_create(FMD_EVT_CTL
,
944 FMD_HRT_NOW
, NULL
, fmd_ctl_init(NULL
));
949 * Once all data structures are initialized, we load all of our modules
950 * in order according to class in order to load up any subscriptions.
951 * Once built-in modules are loaded, we detach from our waiting parent.
953 dp
->d_mod_hash
= fmd_modhash_create();
955 if (fmd_builtin_loadall(dp
->d_mod_hash
) != 0 && !dp
->d_fg
)
956 fmd_error(EFMD_EXIT
, "failed to initialize fault manager\n");
958 (void) fmd_conf_getprop(dp
->d_conf
, "self.name", &name
);
959 dp
->d_self
= fmd_modhash_lookup(dp
->d_mod_hash
, name
);
961 if (dp
->d_self
!= NULL
) {
962 if (fmd_module_dc_key2code(dp
->d_self
, nodc_key
, code_str
,
963 sizeof (code_str
)) == 0)
964 (void) fmd_conf_setprop(dp
->d_conf
, "nodiagcode",
966 if (fmd_module_dc_key2code(dp
->d_self
, repair_key
, code_str
,
967 sizeof (code_str
)) == 0)
968 (void) fmd_conf_setprop(dp
->d_conf
, "repaircode",
970 if (fmd_module_dc_key2code(dp
->d_self
, resolve_key
, code_str
,
971 sizeof (code_str
)) == 0)
972 (void) fmd_conf_setprop(dp
->d_conf
, "resolvecode",
974 if (fmd_module_dc_key2code(dp
->d_self
, update_key
, code_str
,
975 sizeof (code_str
)) == 0)
976 (void) fmd_conf_setprop(dp
->d_conf
, "updatecode",
981 dp
->d_running
= 1; /* we are now officially an active fmd */
984 * Now that we're running, if a pipe fd was specified, write an exit
985 * status to it to indicate that our parent process can safely detach.
986 * Then proceed to loading the remaining non-built-in modules.
989 (void) write(pfd
, &status
, sizeof (status
));
992 * Before loading all modules, repopulate the ASRU cache from its
993 * persistent repository on disk. Then during module loading, the
994 * restoration of checkpoint files will reparent any active cases.
996 fmd_asru_hash_refresh(dp
->d_asrus
);
998 (void) fmd_conf_getprop(dp
->d_conf
, "plugin.path", &pap
);
999 fmd_modhash_loadall(dp
->d_mod_hash
, pap
, &fmd_rtld_ops
, ".so");
1001 (void) fmd_conf_getprop(dp
->d_conf
, "agent.path", &pap
);
1002 fmd_modhash_loadall(dp
->d_mod_hash
, pap
, &fmd_proc_ops
, NULL
);
1004 dp
->d_loaded
= 1; /* modules are now loaded */
1007 * With all modules loaded, replay fault events from the ASRU cache for
1008 * any ASRUs that must be retired, replay error events from the errlog
1009 * that did not finish processing the last time ran, and then release
1010 * the global module barrier by executing a final rele on d_mod_event.
1012 fmd_asru_hash_replay(dp
->d_asrus
);
1014 (void) pthread_rwlock_rdlock(&dp
->d_log_lock
);
1015 fmd_log_replay(dp
->d_errlog
, (fmd_log_f
*)fmd_err_replay
, dp
);
1016 fmd_log_update(dp
->d_errlog
);
1017 (void) pthread_rwlock_unlock(&dp
->d_log_lock
);
1019 dp
->d_mod_event
= NULL
;
1023 * Now replay list.updated and list.repaired events
1025 fmd_case_repair_replay();
1028 * Finally, awaken any threads associated with receiving events from
1029 * open transports and tell them to proceed with fmd_xprt_recv().
1031 fmd_xprt_resume_all();
1033 fmd_clear_aged_rsrcs(dp
, 0, 0);
1035 (void) pthread_mutex_lock(&dp
->d_fmd_lock
);
1037 (void) pthread_cond_broadcast(&dp
->d_fmd_cv
);
1038 (void) pthread_mutex_unlock(&dp
->d_fmd_lock
);
1044 const fmd_conf_mode_t
*cmp
;
1046 (void) printf("Usage: %s -o debug=mode[,mode]\n", dp
->d_pname
);
1048 for (cmp
= _fmd_debug_modes
; cmp
->cm_name
!= NULL
; cmp
++)
1049 (void) printf("\t%s\t%s\n", cmp
->cm_name
, cmp
->cm_desc
);