4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/fm/protocol.h>
28 #include <fm/fmd_api.h>
30 #include <libdevinfo.h>
31 #include <sys/modctl.h>
33 static int global_disable
;
37 struct except_list
*el_next
;
40 static struct except_list
*except_list
;
43 parse_exception_string(fmd_hdl_t
*hdl
, char *estr
)
48 struct except_list
*elem
;
54 /* Remove leading ':' */
60 next
= strchr(p
, ':');
65 elem
= fmd_hdl_alloc(hdl
,
66 sizeof (struct except_list
), FMD_SLEEP
);
67 elem
->el_fault
= fmd_hdl_strdup(hdl
, p
, FMD_SLEEP
);
68 elem
->el_next
= except_list
;
79 if (len
!= strlen(estr
)) {
80 fmd_hdl_abort(hdl
, "Error parsing exception list: %s\n", estr
);
86 * 1 if fault on exception list
90 fault_exception(fmd_hdl_t
*hdl
, nvlist_t
*fault
)
92 struct except_list
*elem
;
94 for (elem
= except_list
; elem
; elem
= elem
->el_next
) {
95 if (fmd_nvl_class_match(hdl
, fault
, elem
->el_fault
)) {
96 fmd_hdl_debug(hdl
, "rio_recv: Skipping fault "
97 "on exception list (%s)\n", elem
->el_fault
);
106 free_exception_list(fmd_hdl_t
*hdl
)
108 struct except_list
*elem
;
110 while (except_list
) {
112 except_list
= elem
->el_next
;
113 fmd_hdl_strfree(hdl
, elem
->el_fault
);
114 fmd_hdl_free(hdl
, elem
, sizeof (*elem
));
121 rio_recv(fmd_hdl_t
*hdl
, fmd_event_t
*ep
, nvlist_t
*nvl
, const char *class)
123 nvlist_t
**faults
= NULL
;
130 di_retire_t drt
= {0};
133 int valid_suspect
= 0;
135 char *snglfault
= FM_FAULT_CLASS
"."FM_ERROR_IO
".";
140 * If disabled, we don't do retire. We still do unretires though
142 if (global_disable
&& (strcmp(class, FM_LIST_SUSPECT_CLASS
) == 0 ||
143 strcmp(class, FM_LIST_UPDATED_CLASS
) == 0)) {
144 fmd_hdl_debug(hdl
, "rio_recv: retire disabled\n");
148 drt
.rt_abort
= (void (*)(void *, const char *, ...))fmd_hdl_abort
;
149 drt
.rt_debug
= (void (*)(void *, const char *, ...))fmd_hdl_debug
;
152 if (strcmp(class, FM_LIST_SUSPECT_CLASS
) == 0) {
154 } else if (strcmp(class, FM_LIST_REPAIRED_CLASS
) == 0) {
156 } else if (strcmp(class, FM_LIST_UPDATED_CLASS
) == 0) {
158 } else if (strcmp(class, FM_LIST_RESOLVED_CLASS
) == 0) {
160 } else if (strncmp(class, snglfault
, strlen(snglfault
)) == 0) {
165 fmd_hdl_debug(hdl
, "rio_recv: not list.* class: %s\n", class);
169 if (nfaults
== 0 && nvlist_lookup_nvlist_array(nvl
,
170 FM_SUSPECT_FAULT_LIST
, &faults
, &nfaults
) != 0) {
171 fmd_hdl_debug(hdl
, "rio_recv: no fault list");
175 for (f
= 0; f
< nfaults
; f
++) {
176 if (nvlist_lookup_boolean_value(faults
[f
], FM_SUSPECT_RETIRE
,
177 &rtr
) == 0 && !rtr
) {
178 fmd_hdl_debug(hdl
, "rio_recv: retire suppressed");
182 if (nvlist_lookup_nvlist(faults
[f
], FM_FAULT_ASRU
,
184 fmd_hdl_debug(hdl
, "rio_recv: no asru in fault");
189 if (nvlist_lookup_string(asru
, FM_FMRI_SCHEME
, &scheme
) != 0 ||
190 strcmp(scheme
, FM_FMRI_SCHEME_DEV
) != 0) {
191 fmd_hdl_debug(hdl
, "rio_recv: not \"dev\" scheme: %s",
192 scheme
? scheme
: "<NULL>");
196 if (fault_exception(hdl
, faults
[f
]))
199 if (nvlist_lookup_string(asru
, FM_FMRI_DEV_PATH
,
200 &path
) != 0 || path
[0] == '\0') {
201 fmd_hdl_debug(hdl
, "rio_recv: no dev path in asru");
207 if (fmd_nvl_fmri_has_fault(hdl
, asru
,
208 FMD_HAS_FAULT_ASRU
, NULL
) == 1) {
209 error
= di_retire_device(path
, &drt
, 0);
211 fmd_hdl_debug(hdl
, "rio_recv:"
212 " di_retire_device failed:"
213 " error: %d %s", error
, path
);
218 if (fmd_nvl_fmri_has_fault(hdl
, asru
,
219 FMD_HAS_FAULT_ASRU
, NULL
) == 0) {
220 error
= di_unretire_device(path
, &drt
);
222 fmd_hdl_debug(hdl
, "rio_recv:"
223 " di_unretire_device failed:"
224 " error: %d %s", error
, path
);
231 * Run through again to handle new faults in a list.updated.
233 for (f
= 0; f
< nfaults
; f
++) {
234 if (nvlist_lookup_boolean_value(faults
[f
], FM_SUSPECT_RETIRE
,
235 &rtr
) == 0 && !rtr
) {
236 fmd_hdl_debug(hdl
, "rio_recv: retire suppressed");
240 if (nvlist_lookup_nvlist(faults
[f
], FM_FAULT_ASRU
,
242 fmd_hdl_debug(hdl
, "rio_recv: no asru in fault");
247 if (nvlist_lookup_string(asru
, FM_FMRI_SCHEME
, &scheme
) != 0 ||
248 strcmp(scheme
, FM_FMRI_SCHEME_DEV
) != 0) {
249 fmd_hdl_debug(hdl
, "rio_recv: not \"dev\" scheme: %s",
250 scheme
? scheme
: "<NULL>");
254 if (fault_exception(hdl
, faults
[f
]))
257 if (nvlist_lookup_string(asru
, FM_FMRI_DEV_PATH
,
258 &path
) != 0 || path
[0] == '\0') {
259 fmd_hdl_debug(hdl
, "rio_recv: no dev path in asru");
263 if (strcmp(class, FM_LIST_UPDATED_CLASS
) == 0) {
264 if (fmd_nvl_fmri_has_fault(hdl
, asru
,
265 FMD_HAS_FAULT_ASRU
, NULL
) == 1) {
266 error
= di_retire_device(path
, &drt
, 0);
268 fmd_hdl_debug(hdl
, "rio_recv:"
269 " di_retire_device failed:"
270 " error: %d %s", error
, path
);
277 * Don't send uuclose or uuresolved unless at least one suspect
278 * was valid for this retire agent and no retires/unretires failed.
280 if (valid_suspect
== 0)
284 * The fmd framework takes care of moving a case to the repaired
285 * state. To move the case to the closed state however, we (the
286 * retire agent) need to call fmd_case_uuclose()
288 if (strcmp(class, FM_LIST_SUSPECT_CLASS
) == 0 && rval
== 0) {
289 if (nvlist_lookup_string(nvl
, FM_SUSPECT_UUID
, &uuid
) == 0 &&
290 !fmd_case_uuclosed(hdl
, uuid
)) {
291 fmd_case_uuclose(hdl
, uuid
);
296 * Similarly to move the case to the resolved state, we (the
297 * retire agent) need to call fmd_case_uuresolved()
299 if (strcmp(class, FM_LIST_REPAIRED_CLASS
) == 0 && rval
== 0 &&
300 nvlist_lookup_string(nvl
, FM_SUSPECT_UUID
, &uuid
) == 0)
301 fmd_case_uuresolved(hdl
, uuid
);
304 static const fmd_hdl_ops_t fmd_ops
= {
305 rio_recv
, /* fmdo_recv */
306 NULL
, /* fmdo_timeout */
307 NULL
, /* fmdo_close */
308 NULL
, /* fmdo_stats */
312 static const fmd_prop_t rio_props
[] = {
313 { "global-disable", FMD_TYPE_BOOL
, "false" },
314 { "fault-exceptions", FMD_TYPE_STRING
, NULL
},
318 static const fmd_hdl_info_t fmd_info
= {
319 "I/O Retire Agent", "2.0", &fmd_ops
, rio_props
323 _fmd_init(fmd_hdl_t
*hdl
)
328 if (fmd_hdl_register(hdl
, FMD_API_VERSION
, &fmd_info
) != 0) {
329 fmd_hdl_debug(hdl
, "failed to register handle\n");
333 global_disable
= fmd_prop_get_int32(hdl
, "global-disable");
336 if (estr
= fmd_prop_get_string(hdl
, "fault-exceptions")) {
337 estrdup
= fmd_hdl_strdup(hdl
, estr
, FMD_SLEEP
);
338 fmd_prop_free_string(hdl
, estr
);
339 parse_exception_string(hdl
, estrdup
);
340 fmd_hdl_strfree(hdl
, estrdup
);
345 _fmd_fini(fmd_hdl_t
*hdl
)
347 free_exception_list(hdl
);