4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/fm/protocol.h>
30 #include <fmd_string.h>
31 #include <fmd_protocol.h>
32 #include <fmd_module.h>
33 #include <fmd_error.h>
39 { "nosub", FMD_TYPE_UINT64
, "event classes with no subscribers seen" },
40 { "module", FMD_TYPE_UINT64
, "error events received from fmd modules" },
43 typedef struct self_case
{
44 enum { SC_CLASS
, SC_MODULE
} sc_kind
;
49 self_case_create(fmd_hdl_t
*hdl
, int kind
, const char *name
)
51 self_case_t
*scp
= fmd_hdl_alloc(hdl
, sizeof (self_case_t
), FMD_SLEEP
);
54 scp
->sc_name
= fmd_hdl_strdup(hdl
, name
, FMD_SLEEP
);
60 self_case_destroy(fmd_hdl_t
*hdl
, self_case_t
*scp
)
62 fmd_hdl_strfree(hdl
, scp
->sc_name
);
63 fmd_hdl_free(hdl
, scp
, sizeof (self_case_t
));
67 self_case_lookup(fmd_hdl_t
*hdl
, int kind
, const char *name
)
69 fmd_case_t
*cp
= NULL
;
71 while ((cp
= fmd_case_next(hdl
, cp
)) != NULL
) {
72 self_case_t
*scp
= fmd_case_getspecific(hdl
, cp
);
73 if (scp
->sc_kind
== kind
&& strcmp(scp
->sc_name
, name
) == 0)
82 self_recv(fmd_hdl_t
*hdl
, fmd_event_t
*ep
, nvlist_t
*nvl
, const char *class)
90 * If we get an error report from another fmd module, then create a
91 * case for the module and add the ereport to it. The error is either
92 * from fmd_hdl_error() or from fmd_api_error(). If it is the latter,
93 * fmd_module_error() will send another event of class EFMD_MOD_FAIL
94 * when the module has failed, at which point we can solve the case.
95 * We can also close the case on EFMD_MOD_CONF (bad config file).
97 if (strcmp(class, fmd_errclass(EFMD_MODULE
)) == 0 &&
98 nvlist_lookup_nvlist(nvl
, FM_EREPORT_DETECTOR
, &mod
) == 0 &&
99 nvlist_lookup_string(mod
, FM_FMRI_FMD_NAME
, &name
) == 0) {
101 if ((cp
= self_case_lookup(hdl
, SC_MODULE
, name
)) == NULL
) {
102 cp
= fmd_case_open(hdl
,
103 self_case_create(hdl
, SC_MODULE
, name
));
106 fmd_case_add_ereport(hdl
, cp
, ep
);
107 self_stats
.module
.fmds_value
.ui64
++;
108 (void) nvlist_lookup_int32(nvl
, FMD_ERR_MOD_ERRNO
, &err
);
110 if (err
!= EFMD_MOD_FAIL
&& err
!= EFMD_MOD_CONF
)
111 return; /* module is still active, so keep case open */
113 if (fmd_case_solved(hdl
, cp
))
114 return; /* case is already closed but error in _fini */
116 class = err
== EFMD_MOD_FAIL
? FMD_FLT_MOD
: FMD_FLT_CONF
;
117 flt
= fmd_protocol_fault(class, 100, mod
, NULL
, NULL
, NULL
);
119 fmd_case_add_suspect(hdl
, cp
, flt
);
120 fmd_case_solve(hdl
, cp
);
126 * If we get an I/O DDI ereport, drop it for now until the I/O DE is
127 * implemented and integrated. Existing drivers in O/N have bugs that
128 * will trigger these and we don't want this producing FMD_FLT_NOSUB.
130 if (strncmp(class, "ereport.io.ddi.", strlen("ereport.io.ddi.")) == 0)
131 return; /* if we got a DDI ereport, drop it for now */
134 * If we get any other type of event then it is of a class for which
135 * there are no subscribers. Some of these correspond to internal fmd
136 * errors, which we ignore. Otherwise we keep one case per class and
137 * use it to produce a message indicating that something is awry.
139 if (strcmp(class, FM_LIST_SUSPECT_CLASS
) == 0 ||
140 strcmp(class, FM_LIST_ISOLATED_CLASS
) == 0 ||
141 strcmp(class, FM_LIST_UPDATED_CLASS
) == 0 ||
142 strcmp(class, FM_LIST_RESOLVED_CLASS
) == 0 ||
143 strcmp(class, FM_LIST_REPAIRED_CLASS
) == 0 ||
144 strncmp(class, FM_FAULT_CLASS
, strlen(FM_FAULT_CLASS
)) == 0 ||
145 strncmp(class, FM_DEFECT_CLASS
, strlen(FM_DEFECT_CLASS
)) == 0)
146 return; /* if no agents are present just drop list.* */
148 if (strncmp(class, FMD_ERR_CLASS
, FMD_ERR_CLASS_LEN
) == 0)
149 return; /* if fmd itself produced the error just drop it */
151 if (strncmp(class, FMD_RSRC_CLASS
, FMD_RSRC_CLASS_LEN
) == 0)
152 return; /* if fmd itself produced the event just drop it */
154 if (strncmp(class, SYSEVENT_RSRC_CLASS
, SYSEVENT_RSRC_CLASS_LEN
) == 0)
155 return; /* sysvent resources are auto generated by fmd */
157 if (self_case_lookup(hdl
, SC_CLASS
, class) != NULL
)
158 return; /* case is already open against this class */
160 if (strncmp(class, FM_IREPORT_CLASS
".",
161 sizeof (FM_IREPORT_CLASS
)) == 0)
162 return; /* no subscriber required for ireport.* */
164 cp
= fmd_case_open(hdl
, self_case_create(hdl
, SC_CLASS
, class));
165 fmd_case_add_ereport(hdl
, cp
, ep
);
166 self_stats
.nosub
.fmds_value
.ui64
++;
168 flt
= fmd_protocol_fault(FMD_FLT_NOSUB
, 100, NULL
, NULL
, NULL
, NULL
);
169 (void) nvlist_add_string(flt
, "nosub_class", class);
170 fmd_case_add_suspect(hdl
, cp
, flt
);
171 fmd_case_solve(hdl
, cp
);
175 self_close(fmd_hdl_t
*hdl
, fmd_case_t
*cp
)
177 self_case_destroy(hdl
, fmd_case_getspecific(hdl
, cp
));
180 static const fmd_hdl_ops_t self_ops
= {
181 self_recv
, /* fmdo_recv */
182 NULL
, /* fmdo_timeout */
183 self_close
, /* fmdo_close */
184 NULL
, /* fmdo_stats */
189 self_init(fmd_hdl_t
*hdl
)
191 fmd_module_t
*mp
= (fmd_module_t
*)hdl
; /* see below */
193 fmd_hdl_info_t info
= {
194 "Fault Manager Self-Diagnosis", "1.0", &self_ops
, NULL
198 * Unlike other modules, fmd-self-diagnosis has some special needs that
199 * fall outside of what we want in the module API. Manually disable
200 * checkpointing for this module by tweaking the mod_stats values.
201 * The self-diagnosis world relates to fmd's running state and modules
202 * which all change when it restarts, so don't bother w/ checkpointing.
204 (void) pthread_mutex_lock(&mp
->mod_stats_lock
);
205 mp
->mod_stats
->ms_ckpt_save
.fmds_value
.b
= FMD_B_FALSE
;
206 mp
->mod_stats
->ms_ckpt_restore
.fmds_value
.b
= FMD_B_FALSE
;
207 (void) pthread_mutex_unlock(&mp
->mod_stats_lock
);
209 if (fmd_hdl_register(hdl
, FMD_API_VERSION
, &info
) != 0)
210 return; /* failed to register with fmd */
212 (void) fmd_stat_create(hdl
, FMD_STAT_NOALLOC
, sizeof (self_stats
) /
213 sizeof (fmd_stat_t
), (fmd_stat_t
*)&self_stats
);
217 self_fini(fmd_hdl_t
*hdl
)
219 fmd_case_t
*cp
= NULL
;
221 while ((cp
= fmd_case_next(hdl
, cp
)) != NULL
)
222 self_case_destroy(hdl
, fmd_case_getspecific(hdl
, cp
));