4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/fm/protocol.h>
28 #include <uuid/uuid.h>
35 #include <fm/libtopo.h>
37 #include <fmd_alloc.h>
38 #include <fmd_string.h>
39 #include <fmd_error.h>
41 #include <fmd_protocol.h>
42 #include <fmd_event.h>
45 #include <fmd_dispq.h>
47 #include <fmd_module.h>
52 static const char *const _fmd_asru_events
[] = {
53 FMD_RSRC_CLASS
"asru.ok", /* UNUSABLE=0 FAULTED=0 */
54 FMD_RSRC_CLASS
"asru.degraded", /* UNUSABLE=0 FAULTED=1 */
55 FMD_RSRC_CLASS
"asru.unknown", /* UNUSABLE=1 FAULTED=0 */
56 FMD_RSRC_CLASS
"asru.faulted" /* UNUSABLE=1 FAULTED=1 */
59 static const char *const _fmd_asru_snames
[] = {
60 "uf", "uF", "Uf", "UF" /* same order as above */
63 volatile uint32_t fmd_asru_fake_not_present
= 0;
66 fmd_asru_strhash(fmd_asru_hash_t
*ahp
, const char *val
)
68 return (topo_fmri_strhash(ahp
->ah_topo
->ft_hdl
, val
) % ahp
->ah_hashlen
);
72 fmd_asru_strcmp(fmd_asru_hash_t
*ahp
, const char *a
, const char *b
)
74 return (topo_fmri_strcmp(ahp
->ah_topo
->ft_hdl
, a
, b
));
78 fmd_asru_create(fmd_asru_hash_t
*ahp
, const char *uuid
,
79 const char *name
, nvlist_t
*fmri
)
81 fmd_asru_t
*ap
= fmd_zalloc(sizeof (fmd_asru_t
), FMD_SLEEP
);
84 (void) pthread_mutex_init(&ap
->asru_lock
, NULL
);
85 (void) pthread_cond_init(&ap
->asru_cv
, NULL
);
87 ap
->asru_name
= fmd_strdup(name
, FMD_SLEEP
);
89 (void) nvlist_xdup(fmri
, &ap
->asru_fmri
, &fmd
.d_nva
);
90 ap
->asru_root
= fmd_strdup(ahp
->ah_dirpath
, FMD_SLEEP
);
91 ap
->asru_uuid
= fmd_strdup(uuid
, FMD_SLEEP
);
92 ap
->asru_uuidlen
= ap
->asru_uuid
? strlen(ap
->asru_uuid
) : 0;
95 if (fmri
&& nvlist_lookup_string(fmri
, FM_FMRI_SCHEME
, &s
) == 0 &&
96 strcmp(s
, FM_FMRI_SCHEME_FMD
) == 0)
97 ap
->asru_flags
|= FMD_ASRU_INTERNAL
;
103 fmd_asru_destroy(fmd_asru_t
*ap
)
105 ASSERT(MUTEX_HELD(&ap
->asru_lock
));
106 ASSERT(ap
->asru_refs
== 0);
108 nvlist_free(ap
->asru_event
);
109 fmd_strfree(ap
->asru_name
);
110 nvlist_free(ap
->asru_fmri
);
111 fmd_strfree(ap
->asru_root
);
112 fmd_free(ap
->asru_uuid
, ap
->asru_uuidlen
+ 1);
113 fmd_free(ap
, sizeof (fmd_asru_t
));
117 fmd_asru_hash_insert(fmd_asru_hash_t
*ahp
, fmd_asru_t
*ap
)
119 uint_t h
= fmd_asru_strhash(ahp
, ap
->asru_name
);
121 ASSERT(RW_WRITE_HELD(&ahp
->ah_lock
));
122 ap
->asru_next
= ahp
->ah_hash
[h
];
123 ahp
->ah_hash
[h
] = ap
;
128 fmd_asru_hold(fmd_asru_t
*ap
)
130 (void) pthread_mutex_lock(&ap
->asru_lock
);
132 ASSERT(ap
->asru_refs
!= 0);
133 (void) pthread_mutex_unlock(&ap
->asru_lock
);
138 * Lookup an asru in the hash by name and place a hold on it. If the asru is
139 * not found, no entry is created and NULL is returned. This internal function
140 * is for callers who have the ah_lock held and is used by lookup_name below.
143 fmd_asru_hash_lookup(fmd_asru_hash_t
*ahp
, const char *name
)
148 ASSERT(RW_LOCK_HELD(&ahp
->ah_lock
));
149 h
= fmd_asru_strhash(ahp
, name
);
151 for (ap
= ahp
->ah_hash
[h
]; ap
!= NULL
; ap
= ap
->asru_next
) {
152 if (fmd_asru_strcmp(ahp
, ap
->asru_name
, name
))
157 (void) fmd_asru_hold(ap
);
159 (void) fmd_set_errno(EFMD_ASRU_NOENT
);
164 #define HC_ONLY_FALSE 0
165 #define HC_ONLY_TRUE 1
168 fmd_asru_replacement_state(nvlist_t
*event
, int hc_only
)
171 nvlist_t
*asru
, *fru
, *rsrc
;
175 * Check if there is evidence that this object is no longer present.
176 * In general fmd_fmri_present() should be supported on resources and/or
177 * frus, as those are the things that are physically present or not
178 * present - an asru can be spread over a number of frus some of which
179 * are present and some not, so fmd_fmri_present() is not generally
180 * meaningful. However retain a check for asru first for compatibility.
181 * If we have checked all three and we still get -1 then nothing knows
182 * whether it's present or not, so err on the safe side and treat it
185 * Note that if hc_only is set, then we only check status using fmris
186 * that are in hc-scheme.
188 if (fmd_asru_fake_not_present
)
189 return (fmd_asru_fake_not_present
);
190 if (nvlist_lookup_nvlist(event
, FM_FAULT_ASRU
, &asru
) == 0 &&
191 (hc_only
== HC_ONLY_FALSE
|| (nvlist_lookup_string(asru
,
192 FM_FMRI_SCHEME
, &s
) == 0 && strcmp(s
, FM_FMRI_SCHEME_HC
) == 0)))
193 ps
= fmd_fmri_replaced(asru
);
194 if (ps
== -1 || ps
== FMD_OBJ_STATE_UNKNOWN
) {
195 if (nvlist_lookup_nvlist(event
, FM_FAULT_RESOURCE
,
196 &rsrc
) == 0 && (hc_only
== HC_ONLY_FALSE
||
197 (nvlist_lookup_string(rsrc
, FM_FMRI_SCHEME
, &s
) == 0 &&
198 strcmp(s
, FM_FMRI_SCHEME_HC
) == 0))) {
200 ps
= fmd_fmri_replaced(rsrc
);
202 /* see if we can improve on UNKNOWN */
203 int ps2
= fmd_fmri_replaced(rsrc
);
204 if (ps2
== FMD_OBJ_STATE_STILL_PRESENT
||
205 ps2
== FMD_OBJ_STATE_REPLACED
)
210 if (ps
== -1 || ps
== FMD_OBJ_STATE_UNKNOWN
) {
211 if (nvlist_lookup_nvlist(event
, FM_FAULT_FRU
, &fru
) == 0 &&
212 (hc_only
== HC_ONLY_FALSE
|| (nvlist_lookup_string(fru
,
213 FM_FMRI_SCHEME
, &s
) == 0 &&
214 strcmp(s
, FM_FMRI_SCHEME_HC
) == 0))) {
216 ps
= fmd_fmri_replaced(fru
);
218 /* see if we can improve on UNKNOWN */
219 int ps2
= fmd_fmri_replaced(fru
);
220 if (ps2
== FMD_OBJ_STATE_STILL_PRESENT
||
221 ps2
== FMD_OBJ_STATE_REPLACED
)
227 ps
= FMD_OBJ_STATE_UNKNOWN
;
232 fmd_asru_asru_hash_insert(fmd_asru_hash_t
*ahp
, fmd_asru_link_t
*alp
,
235 uint_t h
= fmd_asru_strhash(ahp
, name
);
237 ASSERT(RW_WRITE_HELD(&ahp
->ah_lock
));
238 alp
->al_asru_next
= ahp
->ah_asru_hash
[h
];
239 ahp
->ah_asru_hash
[h
] = alp
;
244 fmd_asru_case_hash_insert(fmd_asru_hash_t
*ahp
, fmd_asru_link_t
*alp
,
247 uint_t h
= fmd_asru_strhash(ahp
, name
);
249 ASSERT(RW_WRITE_HELD(&ahp
->ah_lock
));
250 alp
->al_case_next
= ahp
->ah_case_hash
[h
];
251 ahp
->ah_case_hash
[h
] = alp
;
255 fmd_asru_fru_hash_insert(fmd_asru_hash_t
*ahp
, fmd_asru_link_t
*alp
, char *name
)
257 uint_t h
= fmd_asru_strhash(ahp
, name
);
259 ASSERT(RW_WRITE_HELD(&ahp
->ah_lock
));
260 alp
->al_fru_next
= ahp
->ah_fru_hash
[h
];
261 ahp
->ah_fru_hash
[h
] = alp
;
265 fmd_asru_label_hash_insert(fmd_asru_hash_t
*ahp
, fmd_asru_link_t
*alp
,
268 uint_t h
= fmd_asru_strhash(ahp
, name
);
270 ASSERT(RW_WRITE_HELD(&ahp
->ah_lock
));
271 alp
->al_label_next
= ahp
->ah_label_hash
[h
];
272 ahp
->ah_label_hash
[h
] = alp
;
276 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t
*ahp
, fmd_asru_link_t
*alp
,
279 uint_t h
= fmd_asru_strhash(ahp
, name
);
281 ASSERT(RW_WRITE_HELD(&ahp
->ah_lock
));
282 alp
->al_rsrc_next
= ahp
->ah_rsrc_hash
[h
];
283 ahp
->ah_rsrc_hash
[h
] = alp
;
287 fmd_asru_al_destroy(fmd_asru_link_t
*alp
)
289 ASSERT(alp
->al_refs
== 0);
290 ASSERT(MUTEX_HELD(&alp
->al_asru
->asru_lock
));
292 if (alp
->al_log
!= NULL
)
293 fmd_log_rele(alp
->al_log
);
295 fmd_free(alp
->al_uuid
, alp
->al_uuidlen
+ 1);
296 nvlist_free(alp
->al_event
);
297 fmd_strfree(alp
->al_rsrc_name
);
298 fmd_strfree(alp
->al_case_uuid
);
299 fmd_strfree(alp
->al_fru_name
);
300 fmd_strfree(alp
->al_asru_name
);
301 fmd_strfree(alp
->al_label
);
302 nvlist_free(alp
->al_asru_fmri
);
303 fmd_free(alp
, sizeof (fmd_asru_link_t
));
306 static fmd_asru_link_t
*
307 fmd_asru_al_hold(fmd_asru_link_t
*alp
)
309 fmd_asru_t
*ap
= alp
->al_asru
;
311 (void) pthread_mutex_lock(&ap
->asru_lock
);
314 ASSERT(alp
->al_refs
!= 0);
315 (void) pthread_mutex_unlock(&ap
->asru_lock
);
319 static void fmd_asru_destroy(fmd_asru_t
*ap
);
323 fmd_asru_al_hash_release(fmd_asru_hash_t
*ahp
, fmd_asru_link_t
*alp
)
325 fmd_asru_t
*ap
= alp
->al_asru
;
327 (void) pthread_mutex_lock(&ap
->asru_lock
);
328 ASSERT(alp
->al_refs
!= 0);
329 if (--alp
->al_refs
== 0)
330 fmd_asru_al_destroy(alp
);
331 ASSERT(ap
->asru_refs
!= 0);
332 if (--ap
->asru_refs
== 0)
333 fmd_asru_destroy(ap
);
335 (void) pthread_mutex_unlock(&ap
->asru_lock
);
339 fmd_asru_get_namestr(nvlist_t
*nvl
, char **name
, ssize_t
*namelen
)
341 if ((*namelen
= fmd_fmri_nvl2str(nvl
, NULL
, 0)) == -1)
342 return (EFMD_ASRU_FMRI
);
343 *name
= fmd_alloc(*namelen
+ 1, FMD_SLEEP
);
344 if (fmd_fmri_nvl2str(nvl
, *name
, *namelen
+ 1) == -1) {
346 fmd_free(*name
, *namelen
+ 1);
347 return (EFMD_ASRU_FMRI
);
352 static fmd_asru_link_t
*
353 fmd_asru_al_create(fmd_asru_hash_t
*ahp
, nvlist_t
*nvl
, fmd_case_t
*cp
,
356 nvlist_t
*asru
= NULL
, *fru
, *rsrc
;
357 int got_rsrc
= 0, got_asru
= 0, got_fru
= 0;
358 ssize_t fru_namelen
, rsrc_namelen
, asru_namelen
;
359 char *asru_name
, *rsrc_name
, *fru_name
, *name
, *label
;
360 fmd_asru_link_t
*alp
;
363 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
365 if (nvlist_lookup_nvlist(nvl
, FM_FAULT_ASRU
, &asru
) == 0 &&
366 fmd_asru_get_namestr(asru
, &asru_name
, &asru_namelen
) == 0)
368 if (nvlist_lookup_nvlist(nvl
, FM_FAULT_FRU
, &fru
) == 0 &&
369 fmd_asru_get_namestr(fru
, &fru_name
, &fru_namelen
) == 0)
371 if (nvlist_lookup_nvlist(nvl
, FM_FAULT_RESOURCE
, &rsrc
) == 0 &&
372 fmd_asru_get_namestr(rsrc
, &rsrc_name
, &rsrc_namelen
) == 0)
374 if (nvlist_lookup_string(nvl
, FM_FAULT_LOCATION
, &label
) != 0)
378 * Grab the rwlock as a writer; Then create and insert the asru with
379 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and
380 * proceed to initializing the asru.
382 (void) pthread_rwlock_wrlock(&ahp
->ah_lock
);
385 * Create and initialise the per-fault "link" structure.
387 alp
= fmd_zalloc(sizeof (fmd_asru_link_t
), FMD_SLEEP
);
389 (void) nvlist_xdup(asru
, &alp
->al_asru_fmri
, &fmd
.d_nva
);
390 alp
->al_uuid
= fmd_strdup(al_uuid
, FMD_SLEEP
);
391 alp
->al_uuidlen
= strlen(alp
->al_uuid
);
395 * If this is the first fault for this asru, then create the per-asru
396 * structure and link into the hash.
398 name
= got_asru
? asru_name
: "";
399 if ((ap
= fmd_asru_hash_lookup(ahp
, name
)) == NULL
) {
400 ap
= fmd_asru_create(ahp
, al_uuid
, name
, got_asru
? asru
:
402 fmd_asru_hash_insert(ahp
, ap
);
404 nvlist_free(ap
->asru_event
);
405 (void) nvlist_xdup(nvl
, &ap
->asru_event
, &fmd
.d_nva
);
408 * Put the link structure on the list associated with the per-asru
409 * structure. Then put the link structure on the various hashes.
411 fmd_list_append(&ap
->asru_list
, (fmd_list_t
*)alp
);
413 alp
->al_asru_name
= got_asru
? asru_name
: fmd_strdup("", FMD_SLEEP
);
414 fmd_asru_asru_hash_insert(ahp
, alp
, alp
->al_asru_name
);
415 alp
->al_fru_name
= got_fru
? fru_name
: fmd_strdup("", FMD_SLEEP
);
416 fmd_asru_fru_hash_insert(ahp
, alp
, alp
->al_fru_name
);
417 alp
->al_rsrc_name
= got_rsrc
? rsrc_name
: fmd_strdup("", FMD_SLEEP
);
418 fmd_asru_rsrc_hash_insert(ahp
, alp
, alp
->al_rsrc_name
);
419 alp
->al_label
= fmd_strdup(label
, FMD_SLEEP
);
420 fmd_asru_label_hash_insert(ahp
, alp
, label
);
421 alp
->al_case_uuid
= fmd_strdup(cip
->ci_uuid
, FMD_SLEEP
);
422 fmd_asru_case_hash_insert(ahp
, alp
, cip
->ci_uuid
);
423 (void) pthread_mutex_lock(&ap
->asru_lock
);
424 (void) pthread_rwlock_unlock(&ahp
->ah_lock
);
426 ap
->asru_case
= alp
->al_case
= cp
;
427 if (nvlist_lookup_boolean_value(nvl
, FM_SUSPECT_MESSAGE
, &msg
) == 0 &&
429 ap
->asru_flags
|= FMD_ASRU_INVISIBLE
;
430 (void) nvlist_xdup(nvl
, &alp
->al_event
, &fmd
.d_nva
);
431 ap
->asru_flags
|= FMD_ASRU_VALID
;
432 (void) pthread_cond_broadcast(&ap
->asru_cv
);
433 (void) pthread_mutex_unlock(&ap
->asru_lock
);
438 fmd_asru_hash_recreate(fmd_log_t
*lp
, fmd_event_t
*ep
, fmd_asru_hash_t
*ahp
)
440 nvlist_t
*nvl
= FMD_EVENT_NVL(ep
);
441 boolean_t faulty
= FMD_B_FALSE
, unusable
= FMD_B_FALSE
;
443 boolean_t repaired
= FMD_B_FALSE
, replaced
= FMD_B_FALSE
;
444 boolean_t acquitted
= FMD_B_FALSE
, resolved
= FMD_B_FALSE
;
445 nvlist_t
*flt
, *flt_copy
, *asru
;
446 char *case_uuid
= NULL
, *case_code
= NULL
;
448 fmd_asru_link_t
*alp
;
451 nvlist_t
*de_fmri
, *de_fmri_dup
;
460 * Extract the most recent values of 'faulty' from the event log.
462 if (nvlist_lookup_boolean_value(nvl
, FM_RSRC_ASRU_FAULTY
,
464 fmd_error(EFMD_ASRU_EVENT
, "failed to reload asru %s: "
465 "invalid event log record\n", lp
->log_name
);
466 ahp
->ah_error
= EFMD_ASRU_EVENT
;
469 if (nvlist_lookup_nvlist(nvl
, FM_RSRC_ASRU_EVENT
, &flt
) != 0) {
470 fmd_error(EFMD_ASRU_EVENT
, "failed to reload asru %s: "
471 "invalid event log record\n", lp
->log_name
);
472 ahp
->ah_error
= EFMD_ASRU_EVENT
;
475 (void) nvlist_lookup_string(nvl
, FM_RSRC_ASRU_UUID
, &case_uuid
);
476 (void) nvlist_lookup_string(nvl
, FM_RSRC_ASRU_CODE
, &case_code
);
477 (void) nvlist_lookup_boolean_value(nvl
, FM_RSRC_ASRU_UNUSABLE
,
479 (void) nvlist_lookup_boolean_value(nvl
, FM_RSRC_ASRU_REPAIRED
,
481 (void) nvlist_lookup_boolean_value(nvl
, FM_RSRC_ASRU_REPLACED
,
483 (void) nvlist_lookup_boolean_value(nvl
, FM_RSRC_ASRU_ACQUITTED
,
485 (void) nvlist_lookup_boolean_value(nvl
, FM_RSRC_ASRU_RESOLVED
,
489 * Attempt to recreate the case in CLOSED, REPAIRED or RESOLVED state
490 * (depending on whether the faulty/resolved bits are set).
491 * If the case is already present, fmd_case_recreate() will return it.
492 * If not, we'll create a new orphaned case. Either way, we use the
493 * ASRU event to insert a suspect into the partially-restored case.
495 fmd_module_lock(fmd
.d_rmod
);
496 cp
= fmd_case_recreate(fmd
.d_rmod
, NULL
, faulty
? FMD_CASE_CLOSED
:
497 resolved
? FMD_CASE_RESOLVED
: FMD_CASE_REPAIRED
, case_uuid
,
500 fmd_module_unlock(fmd
.d_rmod
);
501 if (nvlist_lookup_boolean_value(nvl
, FM_SUSPECT_INJECTED
,
502 &injected
) == 0 && injected
)
503 fmd_case_set_injected(cp
);
504 if (nvlist_lookup_int64_array(nvl
, FM_SUSPECT_DIAG_TIME
, &diag_time
,
505 &nelem
) == 0 && nelem
>= 2)
506 fmd_case_settime(cp
, diag_time
[0], diag_time
[1]);
508 fmd_case_settime(cp
, lp
->log_stat
.st_ctime
, 0);
509 if (nvlist_lookup_nvlist(nvl
, FM_SUSPECT_DE
, &de_fmri
) == 0) {
510 (void) nvlist_xdup(de_fmri
, &de_fmri_dup
, &fmd
.d_nva
);
511 fmd_case_set_de_fmri(cp
, de_fmri_dup
);
513 (void) nvlist_xdup(flt
, &flt_copy
, &fmd
.d_nva
);
516 * For faults with a resource, re-evaluate the asru from the resource.
518 thp
= fmd_fmri_topo_hold(TOPO_VERSION
);
519 if (nvlist_lookup_string(flt_copy
, FM_CLASS
, &class) == 0 &&
520 strncmp(class, "fault", 5) == 0 &&
521 nvlist_lookup_nvlist(flt_copy
, FM_FAULT_RESOURCE
, &rsrc
) == 0 &&
523 (fmd_fmri_replaced(rsrc
) != FMD_OBJ_STATE_REPLACED
) &&
524 topo_fmri_asru(thp
, rsrc
, &asru
, &err
) == 0) {
525 (void) nvlist_remove(flt_copy
, FM_FAULT_ASRU
, DATA_TYPE_NVLIST
);
526 (void) nvlist_add_nvlist(flt_copy
, FM_FAULT_ASRU
, asru
);
529 fmd_fmri_topo_rele(thp
);
531 (void) nvlist_xdup(flt_copy
, &flt
, &fmd
.d_nva
);
533 fmd_case_recreate_suspect(cp
, flt_copy
);
536 * Now create the resource cache entries.
538 alp
= fmd_asru_al_create(ahp
, flt
, cp
, fmd_strbasename(lp
->log_name
));
542 * Check to see if the resource is still present in the system.
544 ps
= fmd_asru_replacement_state(flt
, HC_ONLY_FALSE
);
545 if (ps
== FMD_OBJ_STATE_REPLACED
) {
546 replaced
= FMD_B_TRUE
;
547 } else if (ps
== FMD_OBJ_STATE_STILL_PRESENT
||
548 ps
== FMD_OBJ_STATE_UNKNOWN
) {
549 ap
->asru_flags
|= FMD_ASRU_PRESENT
;
550 if (nvlist_lookup_nvlist(alp
->al_event
, FM_FAULT_ASRU
,
554 switch (fmd_fmri_service_state(asru
)) {
555 case FMD_SERVICE_STATE_UNUSABLE
:
556 unusable
= FMD_B_TRUE
;
558 case FMD_SERVICE_STATE_OK
:
559 case FMD_SERVICE_STATE_ISOLATE_PENDING
:
560 case FMD_SERVICE_STATE_DEGRADED
:
561 unusable
= FMD_B_FALSE
;
563 case FMD_SERVICE_STATE_UNKNOWN
:
565 /* not supported by scheme */
566 us
= fmd_fmri_unusable(asru
);
568 unusable
= FMD_B_TRUE
;
570 unusable
= FMD_B_FALSE
;
578 ap
->asru_flags
|= FMD_ASRU_RECREATED
;
580 alp
->al_flags
|= FMD_ASRU_FAULTY
;
581 ap
->asru_flags
|= FMD_ASRU_FAULTY
;
584 alp
->al_flags
|= FMD_ASRU_UNUSABLE
;
585 ap
->asru_flags
|= FMD_ASRU_UNUSABLE
;
588 alp
->al_reason
= FMD_ASRU_REPLACED
;
590 alp
->al_reason
= FMD_ASRU_REPAIRED
;
592 alp
->al_reason
= FMD_ASRU_ACQUITTED
;
594 alp
->al_reason
= FMD_ASRU_REMOVED
;
596 TRACE((FMD_DBG_ASRU
, "asru %s recreated as %p (%s)", alp
->al_uuid
,
597 (void *)ap
, _fmd_asru_snames
[ap
->asru_flags
& FMD_ASRU_STATE
]));
601 fmd_asru_hash_discard(fmd_asru_hash_t
*ahp
, const char *uuid
, int err
)
603 char src
[PATH_MAX
], dst
[PATH_MAX
];
605 (void) snprintf(src
, PATH_MAX
, "%s/%s", ahp
->ah_dirpath
, uuid
);
606 (void) snprintf(dst
, PATH_MAX
, "%s/%s-", ahp
->ah_dirpath
, uuid
);
609 err
= rename(src
, dst
);
613 if (err
!= 0 && errno
!= ENOENT
)
614 fmd_error(EFMD_ASRU_EVENT
, "failed to rename log %s", src
);
618 * Open a saved log file and restore it into the ASRU hash. If we can't even
619 * open the log, rename the log file to <uuid>- to indicate it is corrupt. If
620 * fmd_log_replay() fails, we either delete the file (if it has reached the
621 * upper limit on cache age) or rename it for debugging if it was corrupted.
624 fmd_asru_hash_logopen(fmd_asru_hash_t
*ahp
, const char *uuid
)
626 fmd_log_t
*lp
= fmd_log_tryopen(ahp
->ah_dirpath
, uuid
, FMD_LOG_ASRU
);
630 fmd_asru_hash_discard(ahp
, uuid
, errno
);
635 n
= ahp
->ah_al_count
;
637 fmd_log_replay(lp
, (fmd_log_f
*)fmd_asru_hash_recreate
, ahp
);
640 if (ahp
->ah_al_count
== n
)
641 fmd_asru_hash_discard(ahp
, uuid
, ahp
->ah_error
);
645 fmd_asru_hash_refresh(fmd_asru_hash_t
*ahp
)
651 if ((dirp
= opendir(ahp
->ah_dirpath
)) == NULL
) {
652 fmd_error(EFMD_ASRU_NODIR
,
653 "failed to open asru cache directory %s", ahp
->ah_dirpath
);
657 (void) fmd_conf_getprop(fmd
.d_conf
, "rsrc.zero", &zero
);
659 (void) pthread_rwlock_wrlock(&ahp
->ah_lock
);
661 while ((dp
= readdir(dirp
)) != NULL
) {
662 if (dp
->d_name
[0] == '.')
663 continue; /* skip "." and ".." */
666 fmd_asru_hash_discard(ahp
, dp
->d_name
, 0);
667 else if (!fmd_strmatch(dp
->d_name
, "*-"))
668 fmd_asru_hash_logopen(ahp
, dp
->d_name
);
671 (void) pthread_rwlock_unlock(&ahp
->ah_lock
);
672 (void) closedir(dirp
);
676 * If the resource is present and faulty but not unusable, replay the fault
677 * event that caused it be marked faulty. This will cause the agent
678 * subscribing to this fault class to again disable the resource.
682 fmd_asru_hash_replay_asru(fmd_asru_t
*ap
, void *data
)
688 if (ap
->asru_event
!= NULL
&& (ap
->asru_flags
& (FMD_ASRU_STATE
|
689 FMD_ASRU_PRESENT
)) == (FMD_ASRU_FAULTY
| FMD_ASRU_PRESENT
)) {
691 fmd_dprintf(FMD_DBG_ASRU
,
692 "replaying fault event for %s", ap
->asru_name
);
694 (void) nvlist_xdup(ap
->asru_event
, &nvl
, &fmd
.d_nva
);
695 (void) nvlist_lookup_string(nvl
, FM_CLASS
, &class);
697 (void) nvlist_add_string(nvl
, FMD_EVN_UUID
,
698 ((fmd_case_impl_t
*)ap
->asru_case
)->ci_uuid
);
700 e
= fmd_event_create(FMD_EVT_PROTOCOL
, FMD_HRT_NOW
, nvl
, class);
701 fmd_dispq_dispatch(fmd
.d_disp
, e
, class);
706 fmd_asru_hash_replay(fmd_asru_hash_t
*ahp
)
708 fmd_asru_hash_apply(ahp
, fmd_asru_hash_replay_asru
, NULL
);
712 * Check if the resource is still present. If not, and if the rsrc.age time
713 * has expired, then do an implicit repair on the resource.
717 fmd_asru_repair_if_aged(fmd_asru_link_t
*alp
, void *arg
)
724 fmd_asru_rep_arg_t fara
;
726 if (!(alp
->al_flags
& FMD_ASRU_FAULTY
))
730 * Checking for aged resources only happens on the diagnosing side
733 if (alp
->al_flags
& FMD_ASRU_PROXY
)
736 ps
= fmd_asru_replacement_state(alp
->al_event
, HC_ONLY_FALSE
);
737 if (ps
== FMD_OBJ_STATE_REPLACED
) {
738 fara
.fara_reason
= FMD_ASRU_REPLACED
;
739 fara
.fara_bywhat
= FARA_ALL
;
740 fara
.fara_rval
= &err
;
741 fmd_asru_repaired(alp
, &fara
);
742 } else if (ps
== FMD_OBJ_STATE_NOT_PRESENT
) {
743 fmd_time_gettimeofday(&tv
);
744 lp
= fmd_log_open(alp
->al_asru
->asru_root
, alp
->al_uuid
,
748 hrt
= (hrtime_t
)(tv
.tv_sec
- lp
->log_stat
.st_mtime
);
750 if (hrt
* NANOSEC
>= fmd
.d_asrus
->ah_lifetime
) {
751 fara
.fara_reason
= FMD_ASRU_REMOVED
;
752 fara
.fara_bywhat
= FARA_ALL
;
753 fara
.fara_rval
= &err
;
754 fmd_asru_repaired(alp
, &fara
);
761 fmd_asru_check_if_aged(fmd_asru_link_t
*alp
, void *arg
)
768 * Case must be in resolved state for this to be called. So modified
769 * time on resource cache entry should be the time the resolve occurred.
770 * Return 0 if not yet hit rsrc.aged.
772 fmd_time_gettimeofday(&tv
);
773 lp
= fmd_log_open(alp
->al_asru
->asru_root
, alp
->al_uuid
, FMD_LOG_ASRU
);
776 hrt
= (hrtime_t
)(tv
.tv_sec
- lp
->log_stat
.st_mtime
);
778 if (hrt
* NANOSEC
< fmd
.d_asrus
->ah_lifetime
)
784 fmd_asru_most_recent(fmd_asru_link_t
*alp
, void *arg
)
790 * Find most recent modified time of a set of resource cache entries.
792 lp
= fmd_log_open(alp
->al_asru
->asru_root
, alp
->al_uuid
, FMD_LOG_ASRU
);
795 hrt
= lp
->log_stat
.st_mtime
;
797 if (*(uint64_t *)arg
< hrt
)
798 *(uint64_t *)arg
= hrt
;
802 fmd_asru_clear_aged_rsrcs()
804 int check_if_aged
= 1;
805 fmd_asru_al_hash_apply(fmd
.d_asrus
, fmd_asru_repair_if_aged
, NULL
);
806 fmd_case_hash_apply(fmd
.d_cases
, fmd_case_discard_resolved
,
811 fmd_asru_hash_create(const char *root
, const char *dir
)
813 fmd_asru_hash_t
*ahp
;
816 ahp
= fmd_alloc(sizeof (fmd_asru_hash_t
), FMD_SLEEP
);
817 (void) pthread_rwlock_init(&ahp
->ah_lock
, NULL
);
818 ahp
->ah_hashlen
= fmd
.d_str_buckets
;
819 ahp
->ah_hash
= fmd_zalloc(sizeof (void *) * ahp
->ah_hashlen
, FMD_SLEEP
);
820 ahp
->ah_asru_hash
= fmd_zalloc(sizeof (void *) * ahp
->ah_hashlen
,
822 ahp
->ah_case_hash
= fmd_zalloc(sizeof (void *) * ahp
->ah_hashlen
,
824 ahp
->ah_fru_hash
= fmd_zalloc(sizeof (void *) * ahp
->ah_hashlen
,
826 ahp
->ah_label_hash
= fmd_zalloc(sizeof (void *) * ahp
->ah_hashlen
,
828 ahp
->ah_rsrc_hash
= fmd_zalloc(sizeof (void *) * ahp
->ah_hashlen
,
830 (void) snprintf(path
, sizeof (path
), "%s/%s", root
, dir
);
831 ahp
->ah_dirpath
= fmd_strdup(path
, FMD_SLEEP
);
832 (void) fmd_conf_getprop(fmd
.d_conf
, "rsrc.age", &ahp
->ah_lifetime
);
833 (void) fmd_conf_getprop(fmd
.d_conf
, "fakenotpresent",
834 (uint32_t *)&fmd_asru_fake_not_present
);
835 ahp
->ah_al_count
= 0;
838 ahp
->ah_topo
= fmd_topo_hold();
844 fmd_asru_hash_destroy(fmd_asru_hash_t
*ahp
)
846 fmd_asru_link_t
*alp
, *np
;
849 for (i
= 0; i
< ahp
->ah_hashlen
; i
++) {
850 for (alp
= ahp
->ah_case_hash
[i
]; alp
!= NULL
; alp
= np
) {
851 np
= alp
->al_case_next
;
852 alp
->al_case_next
= NULL
;
853 fmd_case_rele(alp
->al_case
);
855 fmd_asru_al_hash_release(ahp
, alp
);
859 fmd_strfree(ahp
->ah_dirpath
);
860 fmd_free(ahp
->ah_hash
, sizeof (void *) * ahp
->ah_hashlen
);
861 fmd_free(ahp
->ah_asru_hash
, sizeof (void *) * ahp
->ah_hashlen
);
862 fmd_free(ahp
->ah_case_hash
, sizeof (void *) * ahp
->ah_hashlen
);
863 fmd_free(ahp
->ah_fru_hash
, sizeof (void *) * ahp
->ah_hashlen
);
864 fmd_free(ahp
->ah_label_hash
, sizeof (void *) * ahp
->ah_hashlen
);
865 fmd_free(ahp
->ah_rsrc_hash
, sizeof (void *) * ahp
->ah_hashlen
);
866 fmd_topo_rele(ahp
->ah_topo
);
867 fmd_free(ahp
, sizeof (fmd_asru_hash_t
));
871 * Take a snapshot of the ASRU database by placing an additional hold on each
872 * member in an auxiliary array, and then call 'func' for each ASRU.
875 fmd_asru_hash_apply(fmd_asru_hash_t
*ahp
,
876 void (*func
)(fmd_asru_t
*, void *), void *arg
)
878 fmd_asru_t
*ap
, **aps
, **app
;
881 (void) pthread_rwlock_rdlock(&ahp
->ah_lock
);
883 aps
= app
= fmd_alloc(ahp
->ah_count
* sizeof (fmd_asru_t
*), FMD_SLEEP
);
886 for (i
= 0; i
< ahp
->ah_hashlen
; i
++) {
887 for (ap
= ahp
->ah_hash
[i
]; ap
!= NULL
; ap
= ap
->asru_next
)
888 *app
++ = fmd_asru_hold(ap
);
891 ASSERT(app
== aps
+ apc
);
892 (void) pthread_rwlock_unlock(&ahp
->ah_lock
);
894 for (i
= 0; i
< apc
; i
++) {
895 if (aps
[i
]->asru_fmri
!= NULL
)
897 fmd_asru_hash_release(ahp
, aps
[i
]);
900 fmd_free(aps
, apc
* sizeof (fmd_asru_t
*));
904 fmd_asru_al_hash_apply(fmd_asru_hash_t
*ahp
,
905 void (*func
)(fmd_asru_link_t
*, void *), void *arg
)
907 fmd_asru_link_t
*alp
, **alps
, **alpp
;
910 (void) pthread_rwlock_rdlock(&ahp
->ah_lock
);
912 alps
= alpp
= fmd_alloc(ahp
->ah_al_count
* sizeof (fmd_asru_link_t
*),
914 alpc
= ahp
->ah_al_count
;
916 for (i
= 0; i
< ahp
->ah_hashlen
; i
++) {
917 for (alp
= ahp
->ah_case_hash
[i
]; alp
!= NULL
;
918 alp
= alp
->al_case_next
)
919 *alpp
++ = fmd_asru_al_hold(alp
);
922 ASSERT(alpp
== alps
+ alpc
);
923 (void) pthread_rwlock_unlock(&ahp
->ah_lock
);
925 for (i
= 0; i
< alpc
; i
++) {
927 fmd_asru_al_hash_release(ahp
, alps
[i
]);
930 fmd_free(alps
, alpc
* sizeof (fmd_asru_link_t
*));
934 fmd_asru_do_hash_apply(fmd_asru_hash_t
*ahp
, const char *name
,
935 void (*func
)(fmd_asru_link_t
*, void *), void *arg
,
936 fmd_asru_link_t
**hash
, size_t match_offset
, size_t next_offset
)
938 fmd_asru_link_t
*alp
, **alps
, **alpp
;
942 (void) pthread_rwlock_rdlock(&ahp
->ah_lock
);
944 h
= fmd_asru_strhash(ahp
, name
);
946 for (alp
= hash
[h
]; alp
!= NULL
; alp
=
947 /* LINTED pointer alignment */
948 FMD_ASRU_AL_HASH_NEXT(alp
, next_offset
))
949 if (fmd_asru_strcmp(ahp
,
950 /* LINTED pointer alignment */
951 FMD_ASRU_AL_HASH_NAME(alp
, match_offset
), name
))
954 alps
= alpp
= fmd_alloc(alpc
* sizeof (fmd_asru_link_t
*), FMD_SLEEP
);
956 for (alp
= hash
[h
]; alp
!= NULL
; alp
=
957 /* LINTED pointer alignment */
958 FMD_ASRU_AL_HASH_NEXT(alp
, next_offset
))
959 if (fmd_asru_strcmp(ahp
,
960 /* LINTED pointer alignment */
961 FMD_ASRU_AL_HASH_NAME(alp
, match_offset
), name
))
962 *alpp
++ = fmd_asru_al_hold(alp
);
964 ASSERT(alpp
== alps
+ alpc
);
965 (void) pthread_rwlock_unlock(&ahp
->ah_lock
);
967 for (i
= 0; i
< alpc
; i
++) {
969 fmd_asru_al_hash_release(ahp
, alps
[i
]);
972 fmd_free(alps
, alpc
* sizeof (fmd_asru_link_t
*));
976 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t
*ahp
, const char *name
,
977 void (*func
)(fmd_asru_link_t
*, void *), void *arg
)
979 fmd_asru_do_hash_apply(ahp
, name
, func
, arg
, ahp
->ah_asru_hash
,
980 offsetof(fmd_asru_link_t
, al_asru_name
),
981 offsetof(fmd_asru_link_t
, al_asru_next
));
985 fmd_asru_hash_apply_by_case(fmd_asru_hash_t
*ahp
, fmd_case_t
*cp
,
986 void (*func
)(fmd_asru_link_t
*, void *), void *arg
)
988 fmd_asru_do_hash_apply(ahp
, ((fmd_case_impl_t
*)cp
)->ci_uuid
, func
, arg
,
989 ahp
->ah_case_hash
, offsetof(fmd_asru_link_t
, al_case_uuid
),
990 offsetof(fmd_asru_link_t
, al_case_next
));
994 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t
*ahp
, const char *name
,
995 void (*func
)(fmd_asru_link_t
*, void *), void *arg
)
997 fmd_asru_do_hash_apply(ahp
, name
, func
, arg
, ahp
->ah_fru_hash
,
998 offsetof(fmd_asru_link_t
, al_fru_name
),
999 offsetof(fmd_asru_link_t
, al_fru_next
));
1003 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t
*ahp
, const char *name
,
1004 void (*func
)(fmd_asru_link_t
*, void *), void *arg
)
1006 fmd_asru_do_hash_apply(ahp
, name
, func
, arg
, ahp
->ah_rsrc_hash
,
1007 offsetof(fmd_asru_link_t
, al_rsrc_name
),
1008 offsetof(fmd_asru_link_t
, al_rsrc_next
));
1012 fmd_asru_hash_apply_by_label(fmd_asru_hash_t
*ahp
, const char *name
,
1013 void (*func
)(fmd_asru_link_t
*, void *), void *arg
)
1015 fmd_asru_do_hash_apply(ahp
, name
, func
, arg
, ahp
->ah_label_hash
,
1016 offsetof(fmd_asru_link_t
, al_label
),
1017 offsetof(fmd_asru_link_t
, al_label_next
));
1021 * Lookup an asru in the hash by name and place a hold on it. If the asru is
1022 * not found, no entry is created and NULL is returned.
1025 fmd_asru_hash_lookup_name(fmd_asru_hash_t
*ahp
, const char *name
)
1029 (void) pthread_rwlock_rdlock(&ahp
->ah_lock
);
1030 ap
= fmd_asru_hash_lookup(ahp
, name
);
1031 (void) pthread_rwlock_unlock(&ahp
->ah_lock
);
1037 * Create a resource cache entry using the fault event "nvl" for one of the
1038 * suspects from the case "cp".
1040 * The fault event can have the following components : FM_FAULT_ASRU,
1041 * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine
1042 * when calling fmd_nvl_create_fault(). In the general case, these are all
1043 * optional and an entry will always be added into the cache even if one or all
1044 * of these fields is missing.
1046 * However, for hardware faults the recommended practice is that the fault
1047 * event should always have the FM_FAULT_RESOURCE field present and that this
1048 * should be represented in hc-scheme.
1050 * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields
1051 * where known, though at some future stage fmd might be able to fill these
1052 * in automatically from the topology.
1055 fmd_asru_hash_create_entry(fmd_asru_hash_t
*ahp
, fmd_case_t
*cp
, nvlist_t
*nvl
)
1060 fmd_asru_link_t
*alp
;
1063 * Generate a UUID for the ASRU. libuuid cleverly gives us no
1064 * interface for specifying or learning the buffer size. Sigh.
1065 * The spec says 36 bytes but we use a tunable just to be safe.
1067 (void) fmd_conf_getprop(fmd
.d_conf
, "uuidlen", &uuidlen
);
1068 parsed_uuid
= fmd_zalloc(uuidlen
+ 1, FMD_SLEEP
);
1069 uuid_generate(uuid
);
1070 uuid_unparse(uuid
, parsed_uuid
);
1073 * Now create the resource cache entries.
1075 fmd_case_hold_locked(cp
);
1076 alp
= fmd_asru_al_create(ahp
, nvl
, cp
, parsed_uuid
);
1077 TRACE((FMD_DBG_ASRU
, "asru %s created as %p",
1078 alp
->al_uuid
, (void *)alp
->al_asru
));
1080 fmd_free(parsed_uuid
, uuidlen
+ 1);
1086 * Release the reference count on an asru obtained using fmd_asru_hash_lookup.
1087 * We take 'ahp' for symmetry and in case we need to use it in future work.
1091 fmd_asru_hash_release(fmd_asru_hash_t
*ahp
, fmd_asru_t
*ap
)
1093 (void) pthread_mutex_lock(&ap
->asru_lock
);
1095 ASSERT(ap
->asru_refs
!= 0);
1096 if (--ap
->asru_refs
== 0)
1097 fmd_asru_destroy(ap
);
1099 (void) pthread_mutex_unlock(&ap
->asru_lock
);
1103 fmd_asru_do_delete_entry(fmd_asru_hash_t
*ahp
, fmd_case_t
*cp
,
1104 fmd_asru_link_t
**hash
, size_t next_offset
, char *name
)
1107 fmd_asru_link_t
*alp
, **pp
, *alpnext
, **alpnextp
;
1109 (void) pthread_rwlock_wrlock(&ahp
->ah_lock
);
1110 h
= fmd_asru_strhash(ahp
, name
);
1112 for (alp
= *pp
; alp
!= NULL
; alp
= alpnext
) {
1113 /* LINTED pointer alignment */
1114 alpnextp
= FMD_ASRU_AL_HASH_NEXTP(alp
, next_offset
);
1115 alpnext
= *alpnextp
;
1116 if (alp
->al_case
== cp
) {
1122 (void) pthread_rwlock_unlock(&ahp
->ah_lock
);
1126 fmd_asru_do_hash_delete(fmd_asru_hash_t
*ahp
, fmd_case_susp_t
*cis
,
1127 fmd_case_t
*cp
, fmd_asru_link_t
**hash
, size_t next_offset
, char *nvname
)
1133 if (nvlist_lookup_nvlist(cis
->cis_nvl
, nvname
, &nvl
) == 0 &&
1134 (namelen
= fmd_fmri_nvl2str(nvl
, NULL
, 0)) != -1 &&
1135 (name
= fmd_alloc(namelen
+ 1, FMD_SLEEP
)) != NULL
) {
1136 if (fmd_fmri_nvl2str(nvl
, name
, namelen
+ 1) != -1)
1137 fmd_asru_do_delete_entry(ahp
, cp
, hash
, next_offset
,
1139 fmd_free(name
, namelen
+ 1);
1141 fmd_asru_do_delete_entry(ahp
, cp
, hash
, next_offset
, "");
1145 fmd_asru_hash_delete_case(fmd_asru_hash_t
*ahp
, fmd_case_t
*cp
)
1147 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1148 fmd_case_susp_t
*cis
;
1149 fmd_asru_link_t
*alp
, **plp
, *alpnext
;
1151 char path
[PATH_MAX
];
1156 * first delete hash entries for each suspect
1158 for (cis
= cip
->ci_suspects
; cis
!= NULL
; cis
= cis
->cis_next
) {
1159 fmd_asru_do_hash_delete(ahp
, cis
, cp
, ahp
->ah_fru_hash
,
1160 offsetof(fmd_asru_link_t
, al_fru_next
), FM_FAULT_FRU
);
1161 fmd_asru_do_hash_delete(ahp
, cis
, cp
, ahp
->ah_rsrc_hash
,
1162 offsetof(fmd_asru_link_t
, al_rsrc_next
), FM_FAULT_RESOURCE
);
1163 if (nvlist_lookup_string(cis
->cis_nvl
, FM_FAULT_LOCATION
,
1166 fmd_asru_do_delete_entry(ahp
, cp
, ahp
->ah_label_hash
,
1167 offsetof(fmd_asru_link_t
, al_label_next
), label
);
1168 fmd_asru_do_hash_delete(ahp
, cis
, cp
, ahp
->ah_asru_hash
,
1169 offsetof(fmd_asru_link_t
, al_asru_next
), FM_FAULT_ASRU
);
1173 * then delete associated case hash entries
1175 (void) pthread_rwlock_wrlock(&ahp
->ah_lock
);
1176 h
= fmd_asru_strhash(ahp
, cip
->ci_uuid
);
1177 plp
= &ahp
->ah_case_hash
[h
];
1178 for (alp
= *plp
; alp
!= NULL
; alp
= alpnext
) {
1179 alpnext
= alp
->al_case_next
;
1180 if (alp
->al_case
== cp
) {
1181 *plp
= alp
->al_case_next
;
1182 alp
->al_case_next
= NULL
;
1183 ASSERT(ahp
->ah_al_count
!= 0);
1187 * decrement case ref.
1189 fmd_case_rele_locked(cp
);
1190 alp
->al_case
= NULL
;
1193 * If we found a matching ASRU, unlink its log file and
1194 * then release the hash entry. Note that it may still
1195 * be referenced if another thread is manipulating it;
1196 * this is ok because once we unlink, the log file will
1197 * not be restored, and the log data will be freed when
1198 * all of the referencing threads release their
1199 * respective references.
1201 (void) snprintf(path
, sizeof (path
), "%s/%s",
1202 ahp
->ah_dirpath
, alp
->al_uuid
);
1203 if (cip
->ci_xprt
== NULL
&& unlink(path
) != 0)
1204 fmd_error(EFMD_ASRU_UNLINK
,
1205 "failed to unlink asru %s", path
);
1208 * Now unlink from the global per-resource cache
1209 * and if this is the last link then remove that from
1210 * it's own hash too.
1213 (void) pthread_mutex_lock(&ap
->asru_lock
);
1214 fmd_list_delete(&ap
->asru_list
, alp
);
1215 if (ap
->asru_list
.l_next
== NULL
) {
1217 fmd_asru_t
*ap2
, **pp
;
1218 fmd_asru_t
*apnext
, **apnextp
;
1220 ASSERT(ahp
->ah_count
!= 0);
1222 h
= fmd_asru_strhash(ahp
, ap
->asru_name
);
1223 pp
= &ahp
->ah_hash
[h
];
1224 for (ap2
= *pp
; ap2
!= NULL
; ap2
= apnext
) {
1225 apnextp
= &ap2
->asru_next
;
1234 (void) pthread_mutex_unlock(&ap
->asru_lock
);
1235 fmd_asru_al_hash_release(ahp
, alp
);
1237 plp
= &alp
->al_case_next
;
1239 (void) pthread_rwlock_unlock(&ahp
->ah_lock
);
1243 nvlist_t
*farc_parent_fmri
;
1244 uint8_t farc_reason
;
1248 fmd_asru_repair_containee(fmd_asru_link_t
*alp
, void *arg
)
1250 fmd_asru_farc_t
*farcp
= (fmd_asru_farc_t
*)arg
;
1252 if ((alp
->al_asru
->asru_flags
& FMD_ASRU_INVISIBLE
) &&
1253 alp
->al_asru_fmri
&&
1254 fmd_fmri_contains(farcp
->farc_parent_fmri
, alp
->al_asru_fmri
) > 0) {
1255 if (fmd_asru_clrflags(alp
, FMD_ASRU_FAULTY
,
1256 farcp
->farc_reason
)) {
1257 if (alp
->al_flags
& FMD_ASRU_PROXY
)
1258 fmd_case_xprt_updated(alp
->al_case
);
1260 fmd_case_update(alp
->al_case
);
1266 fmd_asru_do_repair_containees(fmd_asru_link_t
*alp
, uint8_t reason
)
1271 * Check if all entries associated with this asru are acquitted and
1272 * if so acquit containees. Don't try to repair containees on proxy
1273 * side unless we have local asru.
1275 if (alp
->al_asru_fmri
!= NULL
&& (!(alp
->al_flags
& FMD_ASRU_PROXY
) ||
1276 (alp
->al_flags
& FMD_ASRU_PROXY_WITH_ASRU
))) {
1277 (void) pthread_mutex_lock(&alp
->al_asru
->asru_lock
);
1278 flags
= alp
->al_asru
->asru_flags
;
1279 (void) pthread_mutex_unlock(&alp
->al_asru
->asru_lock
);
1280 if (!(flags
& (FMD_ASRU_FAULTY
| FMD_ASRU_INVISIBLE
))) {
1281 fmd_asru_farc_t farc
;
1283 farc
.farc_parent_fmri
= alp
->al_asru_fmri
;
1284 farc
.farc_reason
= reason
;
1285 fmd_asru_al_hash_apply(fmd
.d_asrus
,
1286 fmd_asru_repair_containee
, &farc
);
1292 fmd_asru_repaired(fmd_asru_link_t
*alp
, void *arg
)
1295 fmd_asru_rep_arg_t
*farap
= (fmd_asru_rep_arg_t
*)arg
;
1298 * don't allow remote repair over readonly transport
1300 if (alp
->al_flags
& FMD_ASRU_PROXY_RDONLY
)
1304 * don't allow repair etc by asru on proxy unless asru is local
1306 if (farap
->fara_bywhat
== FARA_BY_ASRU
&&
1307 (alp
->al_flags
& FMD_ASRU_PROXY
) &&
1308 !(alp
->al_flags
& FMD_ASRU_PROXY_WITH_ASRU
))
1311 * For acquit, need to check both name and uuid if specified
1313 if (farap
->fara_reason
== FMD_ASRU_ACQUITTED
&&
1314 farap
->fara_rval
!= NULL
&& strcmp(farap
->fara_uuid
, "") != 0 &&
1315 strcmp(farap
->fara_uuid
, alp
->al_case_uuid
) != 0)
1319 * For replaced, verify it has been replaced if we have serial number.
1320 * If not set *farap->fara_rval to FARA_ERR_RSRCNOTR.
1322 if (farap
->fara_reason
== FMD_ASRU_REPLACED
&&
1323 !(alp
->al_flags
& FMD_ASRU_PROXY_EXTERNAL
) &&
1324 fmd_asru_replacement_state(alp
->al_event
,
1325 (alp
->al_flags
& FMD_ASRU_PROXY
) ? HC_ONLY_TRUE
: HC_ONLY_FALSE
) ==
1326 FMD_OBJ_STATE_STILL_PRESENT
) {
1327 if (farap
->fara_rval
)
1328 *farap
->fara_rval
= FARA_ERR_RSRCNOTR
;
1332 cleared
= fmd_asru_clrflags(alp
, FMD_ASRU_FAULTY
, farap
->fara_reason
);
1333 fmd_asru_do_repair_containees(alp
, farap
->fara_reason
);
1336 * if called from fmd_adm_*() and we really did clear the bit then
1337 * we need to do a case update to see if the associated case can be
1338 * repaired. No need to do this if called from fmd_case_*() (ie
1339 * when arg is NULL) as the case will be explicitly repaired anyway.
1341 if (farap
->fara_rval
) {
1343 * *farap->fara_rval defaults to FARA_ERR_RSRCNOTF (not found).
1344 * If we find a valid cache entry which we repair then we
1345 * set it to FARA_OK. However we don't want to do this if
1346 * we have already set it to FARA_ERR_RSRCNOTR (not replaced)
1347 * in a previous iteration (see above). So only set it to
1348 * FARA_OK if the current value is still FARA_ERR_RSRCNOTF.
1350 if (*farap
->fara_rval
== FARA_ERR_RSRCNOTF
)
1351 *farap
->fara_rval
= FARA_OK
;
1353 if (alp
->al_flags
& FMD_ASRU_PROXY
)
1354 fmd_case_xprt_updated(alp
->al_case
);
1356 fmd_case_update(alp
->al_case
);
1362 * Discard the case associated with this alp if it is in resolved state.
1363 * Called on "fmadm flush".
1367 fmd_asru_flush(fmd_asru_link_t
*alp
, void *arg
)
1369 int check_if_aged
= 0;
1370 int *rval
= (int *)arg
;
1373 fmd_case_discard_resolved(alp
->al_case
, &check_if_aged
);
1378 * This is only called for proxied faults. Set various flags so we can
1379 * find the nature of the transport from the resource cache code.
1383 fmd_asru_set_on_proxy(fmd_asru_link_t
*alp
, void *arg
)
1385 fmd_asru_set_on_proxy_t
*entryp
= (fmd_asru_set_on_proxy_t
*)arg
;
1387 if (*entryp
->fasp_countp
>= entryp
->fasp_maxcount
)
1391 * Note that this is a proxy fault and save whetehr transport is
1392 * RDONLY or EXTERNAL.
1394 alp
->al_flags
|= FMD_ASRU_PROXY
;
1395 alp
->al_asru
->asru_flags
|= FMD_ASRU_PROXY
;
1397 if (entryp
->fasp_proxy_external
) {
1398 alp
->al_flags
|= FMD_ASRU_PROXY_EXTERNAL
;
1399 alp
->al_asru
->asru_flags
|= FMD_ASRU_PROXY_EXTERNAL
;
1402 if (entryp
->fasp_proxy_rdonly
)
1403 alp
->al_flags
|= FMD_ASRU_PROXY_RDONLY
;
1406 * Save whether asru is accessible in local domain
1408 if (entryp
->fasp_proxy_asru
[*entryp
->fasp_countp
]) {
1409 alp
->al_flags
|= FMD_ASRU_PROXY_WITH_ASRU
;
1410 alp
->al_asru
->asru_flags
|= FMD_ASRU_PROXY_WITH_ASRU
;
1412 (*entryp
->fasp_countp
)++;
1417 fmd_asru_update_containees(fmd_asru_link_t
*alp
, void *arg
)
1419 fmd_asru_do_repair_containees(alp
, alp
->al_reason
);
1423 * This function is used for fault proxying. It updates the resource status in
1424 * the resource cache based on information that has come from the other side of
1425 * the transport. This can be called on either the proxy side or the
1429 fmd_asru_update_status(fmd_asru_link_t
*alp
, void *arg
)
1431 fmd_asru_update_status_t
*entryp
= (fmd_asru_update_status_t
*)arg
;
1434 if (*entryp
->faus_countp
>= entryp
->faus_maxcount
)
1437 status
= entryp
->faus_ba
[*entryp
->faus_countp
];
1440 * For proxy, if there is no asru on the proxy side, but there is on
1441 * the diag side, then take the diag side asru status.
1442 * For diag, if there is an asru on the proxy side, then take the proxy
1445 if (entryp
->faus_is_proxy
?
1446 (entryp
->faus_diag_asru
[*entryp
->faus_countp
] &&
1447 !entryp
->faus_proxy_asru
[*entryp
->faus_countp
]) :
1448 entryp
->faus_proxy_asru
[*entryp
->faus_countp
]) {
1449 if (status
& FM_SUSPECT_DEGRADED
)
1450 alp
->al_flags
|= FMD_ASRU_DEGRADED
;
1452 alp
->al_flags
&= ~FMD_ASRU_DEGRADED
;
1453 if (status
& FM_SUSPECT_UNUSABLE
)
1454 (void) fmd_asru_setflags(alp
, FMD_ASRU_UNUSABLE
);
1456 (void) fmd_asru_clrflags(alp
, FMD_ASRU_UNUSABLE
, 0);
1460 * Update the faulty status too.
1462 if (!(status
& FM_SUSPECT_FAULTY
))
1463 (void) fmd_asru_clrflags(alp
, FMD_ASRU_FAULTY
,
1464 (status
& FM_SUSPECT_REPAIRED
) ? FMD_ASRU_REPAIRED
:
1465 (status
& FM_SUSPECT_REPLACED
) ? FMD_ASRU_REPLACED
:
1466 (status
& FM_SUSPECT_ACQUITTED
) ? FMD_ASRU_ACQUITTED
:
1468 else if (entryp
->faus_is_proxy
)
1469 (void) fmd_asru_setflags(alp
, FMD_ASRU_FAULTY
);
1472 * for proxy only, update the present status too.
1474 if (entryp
->faus_is_proxy
) {
1475 if (!(status
& FM_SUSPECT_NOT_PRESENT
)) {
1476 alp
->al_flags
|= FMD_ASRU_PRESENT
;
1477 alp
->al_asru
->asru_flags
|= FMD_ASRU_PRESENT
;
1479 alp
->al_flags
&= ~FMD_ASRU_PRESENT
;
1480 alp
->al_asru
->asru_flags
&= ~FMD_ASRU_PRESENT
;
1483 (*entryp
->faus_countp
)++;
1487 * This function is called on the diagnosing side when fault proxying is
1488 * in use and the proxy has sent a uuclose. It updates the status of the
1489 * resource cache entries.
1492 fmd_asru_close_status(fmd_asru_link_t
*alp
, void *arg
)
1494 fmd_asru_close_status_t
*entryp
= (fmd_asru_close_status_t
*)arg
;
1496 if (*entryp
->facs_countp
>= entryp
->facs_maxcount
)
1498 alp
->al_flags
&= ~FMD_ASRU_DEGRADED
;
1499 (void) fmd_asru_setflags(alp
, FMD_ASRU_UNUSABLE
);
1500 (*entryp
->facs_countp
)++;
1504 fmd_asru_logevent(fmd_asru_link_t
*alp
)
1506 fmd_asru_t
*ap
= alp
->al_asru
;
1507 boolean_t faulty
= (alp
->al_flags
& FMD_ASRU_FAULTY
) != 0;
1508 boolean_t unusable
= (alp
->al_flags
& FMD_ASRU_UNUSABLE
) != 0;
1509 boolean_t message
= (ap
->asru_flags
& FMD_ASRU_INVISIBLE
) == 0;
1510 boolean_t repaired
= (alp
->al_reason
== FMD_ASRU_REPAIRED
);
1511 boolean_t replaced
= (alp
->al_reason
== FMD_ASRU_REPLACED
);
1512 boolean_t acquitted
= (alp
->al_reason
== FMD_ASRU_ACQUITTED
);
1514 fmd_case_impl_t
*cip
;
1520 ASSERT(MUTEX_HELD(&ap
->asru_lock
));
1521 cip
= (fmd_case_impl_t
*)alp
->al_case
;
1522 ASSERT(cip
!= NULL
);
1525 * Don't log to disk on proxy side
1527 if (cip
->ci_xprt
!= NULL
)
1530 if ((lp
= alp
->al_log
) == NULL
)
1531 lp
= fmd_log_open(ap
->asru_root
, alp
->al_uuid
, FMD_LOG_ASRU
);
1534 return; /* can't log events if we can't open the log */
1536 nvl
= fmd_protocol_rsrc_asru(_fmd_asru_events
[faulty
| (unusable
<< 1)],
1537 alp
->al_asru_fmri
, cip
->ci_uuid
, cip
->ci_code
, faulty
, unusable
,
1538 message
, alp
->al_event
, &cip
->ci_tv
, repaired
, replaced
, acquitted
,
1539 cip
->ci_state
== FMD_CASE_RESOLVED
, cip
->ci_diag_de
== NULL
?
1540 cip
->ci_mod
->mod_fmri
: cip
->ci_diag_de
, cip
->ci_injected
== 1);
1542 (void) nvlist_lookup_string(nvl
, FM_CLASS
, &class);
1543 e
= fmd_event_create(FMD_EVT_PROTOCOL
, FMD_HRT_NOW
, nvl
, class);
1546 fmd_log_append(lp
, e
, NULL
);
1550 * For now, we close the log file after every update to conserve file
1551 * descriptors and daemon overhead. If this becomes a performance
1552 * issue this code can change to keep a fixed-size LRU cache of logs.
1559 fmd_asru_setflags(fmd_asru_link_t
*alp
, uint_t sflag
)
1561 fmd_asru_t
*ap
= alp
->al_asru
;
1562 uint_t nstate
, ostate
;
1564 ASSERT(!(sflag
& ~FMD_ASRU_STATE
));
1565 ASSERT(sflag
!= FMD_ASRU_STATE
);
1567 (void) pthread_mutex_lock(&ap
->asru_lock
);
1569 ostate
= alp
->al_flags
& FMD_ASRU_STATE
;
1570 alp
->al_flags
|= sflag
;
1571 nstate
= alp
->al_flags
& FMD_ASRU_STATE
;
1573 if (nstate
== ostate
) {
1574 (void) pthread_mutex_unlock(&ap
->asru_lock
);
1578 ap
->asru_flags
|= sflag
;
1579 TRACE((FMD_DBG_ASRU
, "asru %s %s->%s", alp
->al_uuid
,
1580 _fmd_asru_snames
[ostate
], _fmd_asru_snames
[nstate
]));
1582 fmd_asru_logevent(alp
);
1584 (void) pthread_cond_broadcast(&ap
->asru_cv
);
1585 (void) pthread_mutex_unlock(&ap
->asru_lock
);
1590 fmd_asru_clrflags(fmd_asru_link_t
*alp
, uint_t sflag
, uint8_t reason
)
1592 fmd_asru_t
*ap
= alp
->al_asru
;
1593 fmd_asru_link_t
*nalp
;
1594 uint_t nstate
, ostate
, flags
= 0;
1596 ASSERT(!(sflag
& ~FMD_ASRU_STATE
));
1597 ASSERT(sflag
!= FMD_ASRU_STATE
);
1599 (void) pthread_mutex_lock(&ap
->asru_lock
);
1601 ostate
= alp
->al_flags
& FMD_ASRU_STATE
;
1602 alp
->al_flags
&= ~sflag
;
1603 nstate
= alp
->al_flags
& FMD_ASRU_STATE
;
1605 if (nstate
== ostate
) {
1606 if (reason
> alp
->al_reason
&&
1607 ((fmd_case_impl_t
*)alp
->al_case
)->ci_state
<
1608 FMD_CASE_REPAIRED
) {
1609 alp
->al_reason
= reason
;
1610 fmd_asru_logevent(alp
);
1611 (void) pthread_cond_broadcast(&ap
->asru_cv
);
1613 (void) pthread_mutex_unlock(&ap
->asru_lock
);
1616 if (reason
> alp
->al_reason
)
1617 alp
->al_reason
= reason
;
1619 if (sflag
== FMD_ASRU_UNUSABLE
)
1620 ap
->asru_flags
&= ~sflag
;
1621 else if (sflag
== FMD_ASRU_FAULTY
) {
1623 * only clear the faulty bit if all links are clear
1625 for (nalp
= fmd_list_next(&ap
->asru_list
); nalp
!= NULL
;
1626 nalp
= fmd_list_next(nalp
))
1627 flags
|= nalp
->al_flags
;
1628 if (!(flags
& FMD_ASRU_FAULTY
))
1629 ap
->asru_flags
&= ~sflag
;
1632 TRACE((FMD_DBG_ASRU
, "asru %s %s->%s", alp
->al_uuid
,
1633 _fmd_asru_snames
[ostate
], _fmd_asru_snames
[nstate
]));
1635 fmd_asru_logevent(alp
);
1637 (void) pthread_cond_broadcast(&ap
->asru_cv
);
1638 (void) pthread_mutex_unlock(&ap
->asru_lock
);
1645 fmd_asru_log_resolved(fmd_asru_link_t
*alp
, void *unused
)
1647 fmd_asru_t
*ap
= alp
->al_asru
;
1649 (void) pthread_mutex_lock(&ap
->asru_lock
);
1650 fmd_asru_logevent(alp
);
1651 (void) pthread_cond_broadcast(&ap
->asru_cv
);
1652 (void) pthread_mutex_unlock(&ap
->asru_lock
);
1656 * Report the current known state of the link entry (ie this particular fault
1657 * affecting this particular ASRU).
1660 fmd_asru_al_getstate(fmd_asru_link_t
*alp
)
1662 int us
, st
= (alp
->al_flags
& (FMD_ASRU_FAULTY
| FMD_ASRU_UNUSABLE
));
1664 int ps
= FMD_OBJ_STATE_UNKNOWN
;
1667 * For fault proxying with an EXTERNAL transport, believe the presence
1668 * state as sent by the diagnosing side. Otherwise find the presence
1669 * state here. Note that if fault proxying with an INTERNAL transport
1670 * we can only trust the presence state where we are using hc-scheme
1671 * fmris which should be consistant across domains in the same system -
1672 * other schemes can refer to different devices in different domains.
1674 if (!(alp
->al_flags
& FMD_ASRU_PROXY_EXTERNAL
)) {
1675 ps
= fmd_asru_replacement_state(alp
->al_event
, (alp
->al_flags
&
1676 FMD_ASRU_PROXY
)? HC_ONLY_TRUE
: HC_ONLY_FALSE
);
1677 if (ps
== FMD_OBJ_STATE_NOT_PRESENT
)
1678 return (st
| FMD_ASRU_UNUSABLE
);
1679 if (ps
== FMD_OBJ_STATE_REPLACED
) {
1680 if (alp
->al_reason
< FMD_ASRU_REPLACED
)
1681 alp
->al_reason
= FMD_ASRU_REPLACED
;
1682 return (st
| FMD_ASRU_UNUSABLE
);
1685 if (ps
== FMD_OBJ_STATE_UNKNOWN
&& (alp
->al_flags
& FMD_ASRU_PROXY
))
1686 st
|= (alp
->al_flags
& (FMD_ASRU_DEGRADED
| FMD_ASRU_PRESENT
));
1688 st
|= (alp
->al_flags
& (FMD_ASRU_DEGRADED
)) | FMD_ASRU_PRESENT
;
1691 * For fault proxying, unless we have a local ASRU, then believe the
1692 * service state sent by the diagnosing side. Otherwise find the service
1693 * state here. Try fmd_fmri_service_state() first, but if that's not
1694 * supported by the scheme then fall back to fmd_fmri_unusable().
1696 if ((!(alp
->al_flags
& FMD_ASRU_PROXY
) ||
1697 (alp
->al_flags
& FMD_ASRU_PROXY_WITH_ASRU
)) &&
1698 nvlist_lookup_nvlist(alp
->al_event
, FM_FAULT_ASRU
, &asru
) == 0) {
1699 us
= fmd_fmri_service_state(asru
);
1700 if (us
== -1 || us
== FMD_SERVICE_STATE_UNKNOWN
) {
1701 /* not supported by scheme - try fmd_fmri_unusable */
1702 us
= fmd_fmri_unusable(asru
);
1704 st
|= FMD_ASRU_UNUSABLE
;
1706 st
&= ~FMD_ASRU_UNUSABLE
;
1708 if (us
== FMD_SERVICE_STATE_UNUSABLE
) {
1709 st
&= ~FMD_ASRU_DEGRADED
;
1710 st
|= FMD_ASRU_UNUSABLE
;
1711 } else if (us
== FMD_SERVICE_STATE_OK
) {
1712 st
&= ~(FMD_ASRU_DEGRADED
| FMD_ASRU_UNUSABLE
);
1713 } else if (us
== FMD_SERVICE_STATE_ISOLATE_PENDING
) {
1714 st
&= ~(FMD_ASRU_DEGRADED
| FMD_ASRU_UNUSABLE
);
1715 } else if (us
== FMD_SERVICE_STATE_DEGRADED
) {
1716 st
&= ~FMD_ASRU_UNUSABLE
;
1717 st
|= FMD_ASRU_DEGRADED
;
1725 * Report the current known state of the ASRU by refreshing its unusable status
1726 * based upon the routines provided by the scheme module. If the unusable bit
1727 * is different, we do *not* generate a state change here because that change
1728 * may be unrelated to fmd activities and therefore we have no case or event.
1729 * The absence of the transition is harmless as this function is only provided
1730 * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY.
1733 fmd_asru_getstate(fmd_asru_t
*ap
)
1738 /* do not report non-fmd non-present resources */
1739 if (!(ap
->asru_flags
& FMD_ASRU_INTERNAL
)) {
1741 * As with fmd_asru_al_getstate(), we can only trust the
1742 * local presence state on a proxy if the transport is
1743 * internal and the scheme is hc. Otherwise we believe the
1744 * state as sent by the diagnosing side.
1746 if (!(ap
->asru_flags
& FMD_ASRU_PROXY
) ||
1747 (!(ap
->asru_flags
& FMD_ASRU_PROXY_EXTERNAL
) &&
1748 (nvlist_lookup_string(ap
->asru_fmri
, FM_FMRI_SCHEME
,
1749 &s
) == 0 && strcmp(s
, FM_FMRI_SCHEME_HC
) == 0))) {
1750 if (fmd_asru_fake_not_present
>=
1751 FMD_OBJ_STATE_REPLACED
)
1753 p
= fmd_fmri_present(ap
->asru_fmri
);
1755 if (p
== 0 || (p
< 0 && !(ap
->asru_flags
& FMD_ASRU_PROXY
) ||
1756 !(ap
->asru_flags
& FMD_ASRU_PRESENT
)))
1761 * As with fmd_asru_al_getstate(), we can only trust the local unusable
1762 * state on a proxy if there is a local ASRU.
1764 st
= ap
->asru_flags
& (FMD_ASRU_FAULTY
| FMD_ASRU_UNUSABLE
);
1765 if (!(ap
->asru_flags
& FMD_ASRU_PROXY
) ||
1766 (ap
->asru_flags
& FMD_ASRU_PROXY_WITH_ASRU
)) {
1767 us
= fmd_fmri_unusable(ap
->asru_fmri
);
1769 st
|= FMD_ASRU_UNUSABLE
;
1771 st
&= ~FMD_ASRU_UNUSABLE
;