4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
29 * Diagnosis engines are expected to group telemetry events related to the
30 * diagnosis of a particular problem on the system into a set of cases. The
31 * diagnosis engine may have any number of cases open at a given point in time.
32 * Some cases may eventually be *solved* by associating a suspect list of one
33 * or more problems with the case, at which point fmd publishes a list.suspect
34 * event for the case and it becomes visible to administrators and agents.
36 * Every case is named using a UUID, and is globally visible in the case hash.
37 * Cases are reference-counted, except for the reference from the case hash
38 * itself. Consumers of case references include modules, which store active
39 * cases on the mod_cases list, ASRUs in the resource cache, and the RPC code.
41 * Cases obey the following state machine. In states UNSOLVED, SOLVED, and
42 * CLOSE_WAIT, a case's module refers to the owning module (a diagnosis engine
43 * or transport) and the case is referenced by the mod_cases list. Once the
44 * case reaches the CLOSED or REPAIRED states, a case's module changes to refer
45 * to the root module (fmd.d_rmod) and is deleted from the owner's mod_cases.
48 * +----------| UNSOLVED |
62 * +-----------+ | +------------+
65 * discard | CLOSED | 6 |
82 * The state machine changes are triggered by calls to fmd_case_transition()
83 * from various locations inside of fmd, as described below:
85 * [1] Called by: fmd_case_solve()
86 * Actions: FMD_CF_SOLVED flag is set in ci_flags
87 * conviction policy is applied to suspect list
88 * suspects convicted are marked faulty (F) in R$
89 * list.suspect event logged and dispatched
91 * [2] Called by: fmd_case_close(), fmd_case_uuclose()
92 * Actions: diagnosis engine fmdo_close() entry point scheduled
93 * case discarded upon exit from CLOSE_WAIT
95 * [3] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose()
96 * Actions: FMD_CF_ISOLATED flag is set in ci_flags
97 * suspects convicted (F) are marked unusable (U) in R$
98 * diagnosis engine fmdo_close() entry point scheduled
99 * case transitions to CLOSED [4] upon exit from CLOSE_WAIT
101 * [4] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
102 * Actions: list.isolated event dispatched
103 * case deleted from module's list of open cases
105 * [5] Called by: fmd_case_repair(), fmd_case_update()
106 * Actions: FMD_CF_REPAIR flag is set in ci_flags
107 * diagnosis engine fmdo_close() entry point scheduled
108 * case transitions to REPAIRED [6] upon exit from CLOSE_WAIT
110 * [6] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
111 * Actions: suspects convicted are marked non faulty (!F) in R$
112 * list.repaired or list.updated event dispatched
114 * [7] Called by: fmd_case_repair(), fmd_case_update()
115 * Actions: FMD_CF_REPAIR flag is set in ci_flags
116 * suspects convicted are marked non faulty (!F) in R$
117 * list.repaired or list.updated event dispatched
119 * [8] Called by: fmd_case_uuresolve()
120 * Actions: list.resolved event dispatched
124 #include <sys/fm/protocol.h>
125 #include <uuid/uuid.h>
128 #include <fmd_alloc.h>
129 #include <fmd_module.h>
130 #include <fmd_error.h>
131 #include <fmd_conf.h>
132 #include <fmd_case.h>
133 #include <fmd_string.h>
134 #include <fmd_subr.h>
135 #include <fmd_protocol.h>
136 #include <fmd_event.h>
137 #include <fmd_eventq.h>
138 #include <fmd_dispq.h>
141 #include <fmd_asru.h>
142 #include <fmd_fmri.h>
143 #include <fmd_xprt.h>
147 static const char *const _fmd_case_snames
[] = {
148 "UNSOLVED", /* FMD_CASE_UNSOLVED */
149 "SOLVED", /* FMD_CASE_SOLVED */
150 "CLOSE_WAIT", /* FMD_CASE_CLOSE_WAIT */
151 "CLOSED", /* FMD_CASE_CLOSED */
152 "REPAIRED", /* FMD_CASE_REPAIRED */
153 "RESOLVED" /* FMD_CASE_RESOLVED */
156 static fmd_case_impl_t
*fmd_case_tryhold(fmd_case_impl_t
*);
159 fmd_case_hash_create(void)
161 fmd_case_hash_t
*chp
= fmd_alloc(sizeof (fmd_case_hash_t
), FMD_SLEEP
);
163 (void) pthread_rwlock_init(&chp
->ch_lock
, NULL
);
164 chp
->ch_hashlen
= fmd
.d_str_buckets
;
165 chp
->ch_hash
= fmd_zalloc(sizeof (void *) * chp
->ch_hashlen
, FMD_SLEEP
);
166 chp
->ch_code_hash
= fmd_zalloc(sizeof (void *) * chp
->ch_hashlen
,
174 * Destroy the case hash. Unlike most of our hash tables, no active references
175 * are kept by the case hash itself; all references come from other subsystems.
176 * The hash must be destroyed after all modules are unloaded; if anything was
177 * present in the hash it would be by definition a reference count leak.
180 fmd_case_hash_destroy(fmd_case_hash_t
*chp
)
182 fmd_free(chp
->ch_hash
, sizeof (void *) * chp
->ch_hashlen
);
183 fmd_free(chp
->ch_code_hash
, sizeof (void *) * chp
->ch_hashlen
);
184 fmd_free(chp
, sizeof (fmd_case_hash_t
));
188 * Take a snapshot of the case hash by placing an additional hold on each
189 * member in an auxiliary array, and then call 'func' for each case.
192 fmd_case_hash_apply(fmd_case_hash_t
*chp
,
193 void (*func
)(fmd_case_t
*, void *), void *arg
)
195 fmd_case_impl_t
*cp
, **cps
, **cpp
;
198 (void) pthread_rwlock_rdlock(&chp
->ch_lock
);
200 cps
= cpp
= fmd_alloc(chp
->ch_count
* sizeof (fmd_case_t
*), FMD_SLEEP
);
203 for (i
= 0; i
< chp
->ch_hashlen
; i
++) {
204 for (cp
= chp
->ch_hash
[i
]; cp
!= NULL
; cp
= cp
->ci_next
)
205 *cpp
++ = fmd_case_tryhold(cp
);
208 ASSERT(cpp
== cps
+ cpc
);
209 (void) pthread_rwlock_unlock(&chp
->ch_lock
);
211 for (i
= 0; i
< cpc
; i
++) {
212 if (cps
[i
] != NULL
) {
213 func((fmd_case_t
*)cps
[i
], arg
);
214 fmd_case_rele((fmd_case_t
*)cps
[i
]);
218 fmd_free(cps
, cpc
* sizeof (fmd_case_t
*));
222 fmd_case_code_hash_insert(fmd_case_hash_t
*chp
, fmd_case_impl_t
*cip
)
224 uint_t h
= fmd_strhash(cip
->ci_code
) % chp
->ch_hashlen
;
226 cip
->ci_code_next
= chp
->ch_code_hash
[h
];
227 chp
->ch_code_hash
[h
] = cip
;
231 fmd_case_code_hash_delete(fmd_case_hash_t
*chp
, fmd_case_impl_t
*cip
)
233 fmd_case_impl_t
**pp
, *cp
;
236 uint_t h
= fmd_strhash(cip
->ci_code
) % chp
->ch_hashlen
;
238 pp
= &chp
->ch_code_hash
[h
];
239 for (cp
= *pp
; cp
!= NULL
; cp
= cp
->ci_code_next
) {
241 pp
= &cp
->ci_code_next
;
246 *pp
= cp
->ci_code_next
;
247 cp
->ci_code_next
= NULL
;
253 * Look up the diagcode for this case and cache it in ci_code. If no suspects
254 * were defined for this case or if the lookup fails, the event dictionary or
255 * module code is broken, and we set the event code to a precomputed default.
258 fmd_case_mkcode(fmd_case_t
*cp
)
260 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
261 fmd_case_susp_t
*cis
;
262 fmd_case_hash_t
*chp
= fmd
.d_cases
;
267 ASSERT(MUTEX_HELD(&cip
->ci_lock
));
268 ASSERT(cip
->ci_state
>= FMD_CASE_SOLVED
);
271 * delete any existing entry from code hash if it is on it
273 fmd_case_code_hash_delete(chp
, cip
);
275 fmd_free(cip
->ci_code
, cip
->ci_codelen
);
276 cip
->ci_codelen
= cip
->ci_mod
->mod_codelen
;
277 cip
->ci_code
= fmd_zalloc(cip
->ci_codelen
, FMD_SLEEP
);
278 keys
= keyp
= alloca(sizeof (char *) * (cip
->ci_nsuspects
+ 1));
280 for (cis
= cip
->ci_suspects
; cis
!= NULL
; cis
= cis
->cis_next
) {
281 if (nvlist_lookup_string(cis
->cis_nvl
, FM_CLASS
, keyp
) == 0)
285 *keyp
= NULL
; /* mark end of keys[] array for libdiagcode */
287 if (cip
->ci_nsuspects
== 0 || fmd_module_dc_key2code(
288 cip
->ci_mod
, keys
, cip
->ci_code
, cip
->ci_codelen
) != 0) {
289 (void) fmd_conf_getprop(fmd
.d_conf
, "nodiagcode", &s
);
290 fmd_free(cip
->ci_code
, cip
->ci_codelen
);
291 cip
->ci_codelen
= strlen(s
) + 1;
292 cip
->ci_code
= fmd_zalloc(cip
->ci_codelen
, FMD_SLEEP
);
293 (void) strcpy(cip
->ci_code
, s
);
297 * add into hash of solved cases
299 fmd_case_code_hash_insert(chp
, cip
);
301 return (cip
->ci_code
);
313 fmd_case_set_lst(fmd_asru_link_t
*alp
, void *arg
)
315 fmd_case_lst_t
*entryp
= (fmd_case_lst_t
*)arg
;
319 if (*entryp
->fcl_countp
>= entryp
->fcl_maxcount
)
321 if (nvlist_lookup_boolean_value(alp
->al_event
, FM_SUSPECT_MESSAGE
,
322 &b
) == 0 && b
== B_FALSE
)
323 *entryp
->fcl_msgp
= B_FALSE
;
324 entryp
->fcl_ba
[*entryp
->fcl_countp
] = 0;
325 state
= fmd_asru_al_getstate(alp
);
326 if (state
& FMD_ASRU_DEGRADED
)
327 entryp
->fcl_ba
[*entryp
->fcl_countp
] |= FM_SUSPECT_DEGRADED
;
328 if (state
& FMD_ASRU_UNUSABLE
)
329 entryp
->fcl_ba
[*entryp
->fcl_countp
] |= FM_SUSPECT_UNUSABLE
;
330 if (state
& FMD_ASRU_FAULTY
)
331 entryp
->fcl_ba
[*entryp
->fcl_countp
] |= FM_SUSPECT_FAULTY
;
332 if (!(state
& FMD_ASRU_PRESENT
))
333 entryp
->fcl_ba
[*entryp
->fcl_countp
] |= FM_SUSPECT_NOT_PRESENT
;
334 if (alp
->al_reason
== FMD_ASRU_REPAIRED
)
335 entryp
->fcl_ba
[*entryp
->fcl_countp
] |= FM_SUSPECT_REPAIRED
;
336 else if (alp
->al_reason
== FMD_ASRU_REPLACED
)
337 entryp
->fcl_ba
[*entryp
->fcl_countp
] |= FM_SUSPECT_REPLACED
;
338 else if (alp
->al_reason
== FMD_ASRU_ACQUITTED
)
339 entryp
->fcl_ba
[*entryp
->fcl_countp
] |= FM_SUSPECT_ACQUITTED
;
340 entryp
->fcl_nva
[*entryp
->fcl_countp
] = alp
->al_event
;
341 (*entryp
->fcl_countp
)++;
345 fmd_case_faulty(fmd_asru_link_t
*alp
, void *arg
)
347 int *faultyp
= (int *)arg
;
349 *faultyp
|= (alp
->al_flags
& FMD_ASRU_FAULTY
);
353 fmd_case_usable(fmd_asru_link_t
*alp
, void *arg
)
355 int *usablep
= (int *)arg
;
357 *usablep
|= !(fmd_asru_al_getstate(alp
) & FMD_ASRU_UNUSABLE
);
361 fmd_case_not_faulty(fmd_asru_link_t
*alp
, void *arg
)
363 int *not_faultyp
= (int *)arg
;
365 *not_faultyp
|= !(alp
->al_flags
& FMD_ASRU_FAULTY
);
369 * Have we got any suspects with an asru that are still unusable and present?
372 fmd_case_unusable_and_present(fmd_asru_link_t
*alp
, void *arg
)
374 int *rvalp
= (int *)arg
;
379 * if this a proxy case and this suspect doesn't have an local asru
380 * then state is unknown so we must assume it may still be unusable.
382 if ((alp
->al_flags
& FMD_ASRU_PROXY
) &&
383 !(alp
->al_flags
& FMD_ASRU_PROXY_WITH_ASRU
)) {
388 state
= fmd_asru_al_getstate(alp
);
389 if (nvlist_lookup_nvlist(alp
->al_event
, FM_FAULT_ASRU
, &asru
) != 0)
391 *rvalp
|= ((state
& FMD_ASRU_UNUSABLE
) && (state
& FMD_ASRU_PRESENT
));
395 fmd_case_mkevent(fmd_case_t
*cp
, const char *class)
397 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
398 nvlist_t
**nva
, *nvl
;
405 (void) pthread_mutex_lock(&cip
->ci_lock
);
406 ASSERT(cip
->ci_state
>= FMD_CASE_SOLVED
);
408 nva
= alloca(sizeof (nvlist_t
*) * cip
->ci_nsuspects
);
409 ba
= alloca(sizeof (uint8_t) * cip
->ci_nsuspects
);
412 * For each suspect associated with the case, store its fault event
413 * nvlist in 'nva'. We also look to see if any of the suspect faults
414 * have asked not to be messaged. If any of them have made such a
415 * request, propagate that attribute to the composite list.* event.
416 * Finally, store each suspect's faulty status into the bitmap 'ba'.
418 fcl
.fcl_countp
= &count
;
419 fcl
.fcl_maxcount
= cip
->ci_nsuspects
;
423 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
, fmd_case_set_lst
, &fcl
);
425 if (cip
->ci_code
== NULL
)
426 (void) fmd_case_mkcode(cp
);
428 * For repair and updated event, we lookup diagcode from dict using key
429 * "list.repaired" or "list.updated" or "list.resolved".
431 if (strcmp(class, FM_LIST_REPAIRED_CLASS
) == 0)
432 (void) fmd_conf_getprop(fmd
.d_conf
, "repaircode", &code
);
433 else if (strcmp(class, FM_LIST_RESOLVED_CLASS
) == 0)
434 (void) fmd_conf_getprop(fmd
.d_conf
, "resolvecode", &code
);
435 else if (strcmp(class, FM_LIST_UPDATED_CLASS
) == 0)
436 (void) fmd_conf_getprop(fmd
.d_conf
, "updatecode", &code
);
441 cip
->ci_flags
|= FMD_CF_INVISIBLE
;
444 * Use the ci_diag_de if one has been saved (eg for an injected fault).
445 * Otherwise use the authority for the current module.
447 nvl
= fmd_protocol_list(class, cip
->ci_diag_de
== NULL
?
448 cip
->ci_mod
->mod_fmri
: cip
->ci_diag_de
, cip
->ci_uuid
, code
, count
,
449 nva
, ba
, msg
, &cip
->ci_tv
, cip
->ci_injected
);
451 (void) pthread_mutex_unlock(&cip
->ci_lock
);
455 static int fmd_case_match_on_faulty_overlap
= 1;
456 static int fmd_case_match_on_acquit_overlap
= 1;
457 static int fmd_case_auto_acquit_isolated
= 1;
458 static int fmd_case_auto_acquit_non_acquitted
= 1;
459 static int fmd_case_too_recent
= 10; /* time in seconds */
462 fmd_case_compare_elem(nvlist_t
*nvl
, nvlist_t
*xnvl
, const char *elem
)
466 char *new_name
= NULL
;
470 int fmri_present
= 1;
471 int new_fmri_present
= 1;
473 fmd_topo_t
*ftp
= fmd_topo_hold();
475 if (nvlist_lookup_nvlist(xnvl
, elem
, &rsrc
) != 0)
478 if ((namelen
= fmd_fmri_nvl2str(rsrc
, NULL
, 0)) == -1)
480 name
= fmd_alloc(namelen
+ 1, FMD_SLEEP
);
481 if (fmd_fmri_nvl2str(rsrc
, name
, namelen
+ 1) == -1)
484 if (nvlist_lookup_nvlist(nvl
, elem
, &new_rsrc
) != 0)
485 new_fmri_present
= 0;
487 if ((new_namelen
= fmd_fmri_nvl2str(new_rsrc
, NULL
, 0)) == -1)
489 new_name
= fmd_alloc(new_namelen
+ 1, FMD_SLEEP
);
490 if (fmd_fmri_nvl2str(new_rsrc
, new_name
, new_namelen
+ 1) == -1)
493 match
= (fmri_present
== new_fmri_present
&&
494 (fmri_present
== 0 ||
495 topo_fmri_strcmp(ftp
->ft_hdl
, name
, new_name
)));
498 fmd_free(name
, namelen
+ 1);
499 if (new_name
!= NULL
)
500 fmd_free(new_name
, new_namelen
+ 1);
506 fmd_case_match_suspect(nvlist_t
*nvl1
, nvlist_t
*nvl2
)
508 char *class, *new_class
;
510 if (!fmd_case_compare_elem(nvl1
, nvl2
, FM_FAULT_ASRU
))
512 if (!fmd_case_compare_elem(nvl1
, nvl2
, FM_FAULT_RESOURCE
))
514 if (!fmd_case_compare_elem(nvl1
, nvl2
, FM_FAULT_FRU
))
516 (void) nvlist_lookup_string(nvl2
, FM_CLASS
, &class);
517 (void) nvlist_lookup_string(nvl1
, FM_CLASS
, &new_class
);
518 return (strcmp(class, new_class
) == 0);
524 fmd_case_impl_t
*fcms_cip
;
525 uint8_t *fcms_new_susp_state
;
526 uint8_t *fcms_old_susp_state
;
527 uint8_t *fcms_old_match_state
;
529 #define SUSPECT_STATE_FAULTY 0x1
530 #define SUSPECT_STATE_ISOLATED 0x2
531 #define SUSPECT_STATE_REMOVED 0x4
532 #define SUSPECT_STATE_ACQUITED 0x8
533 #define SUSPECT_STATE_REPAIRED 0x10
534 #define SUSPECT_STATE_REPLACED 0x20
535 #define SUSPECT_STATE_NO_MATCH 0x1
538 * This is called for each suspect in the old case. Compare it against each
539 * suspect in the new case, setting fcms_old_susp_state and fcms_new_susp_state
540 * as appropriate. fcms_new_susp_state will left as 0 if the suspect is not
541 * found in the old case.
544 fmd_case_match_suspects(fmd_asru_link_t
*alp
, void *arg
)
546 fcms_t
*fcmsp
= (fcms_t
*)arg
;
547 fmd_case_impl_t
*cip
= fcmsp
->fcms_cip
;
548 fmd_case_susp_t
*cis
;
550 int state
= fmd_asru_al_getstate(alp
);
552 if (*fcmsp
->fcms_countp
>= fcmsp
->fcms_maxcount
)
555 if (!(state
& FMD_ASRU_PRESENT
) || (!(state
& FMD_ASRU_FAULTY
) &&
556 alp
->al_reason
== FMD_ASRU_REMOVED
))
557 fcmsp
->fcms_old_susp_state
[*fcmsp
->fcms_countp
] =
558 SUSPECT_STATE_REMOVED
;
559 else if ((state
& FMD_ASRU_UNUSABLE
) && (state
& FMD_ASRU_FAULTY
))
560 fcmsp
->fcms_old_susp_state
[*fcmsp
->fcms_countp
] =
561 SUSPECT_STATE_ISOLATED
;
562 else if (state
& FMD_ASRU_FAULTY
)
563 fcmsp
->fcms_old_susp_state
[*fcmsp
->fcms_countp
] =
564 SUSPECT_STATE_FAULTY
;
565 else if (alp
->al_reason
== FMD_ASRU_REPLACED
)
566 fcmsp
->fcms_old_susp_state
[*fcmsp
->fcms_countp
] =
567 SUSPECT_STATE_REPLACED
;
568 else if (alp
->al_reason
== FMD_ASRU_ACQUITTED
)
569 fcmsp
->fcms_old_susp_state
[*fcmsp
->fcms_countp
] =
570 SUSPECT_STATE_ACQUITED
;
572 fcmsp
->fcms_old_susp_state
[*fcmsp
->fcms_countp
] =
573 SUSPECT_STATE_REPAIRED
;
575 for (cis
= cip
->ci_suspects
; cis
!= NULL
; cis
= cis
->cis_next
, i
++)
576 if (fmd_case_match_suspect(cis
->cis_nvl
, alp
->al_event
) == 1)
579 fcmsp
->fcms_new_susp_state
[i
] =
580 fcmsp
->fcms_old_susp_state
[*fcmsp
->fcms_countp
];
582 fcmsp
->fcms_old_match_state
[*fcmsp
->fcms_countp
] |=
583 SUSPECT_STATE_NO_MATCH
;
584 (*fcmsp
->fcms_countp
)++;
589 fmd_case_impl_t
*fca_cip
;
593 * Re-fault all acquitted suspects that are still present in the new list.
596 fmd_case_fault_acquitted_matching(fmd_asru_link_t
*alp
, void *arg
)
598 fca_t
*fcap
= (fca_t
*)arg
;
599 fmd_case_impl_t
*cip
= fcap
->fca_cip
;
600 fmd_case_susp_t
*cis
;
601 int state
= fmd_asru_al_getstate(alp
);
603 if (!(state
& FMD_ASRU_FAULTY
) &&
604 alp
->al_reason
== FMD_ASRU_ACQUITTED
) {
605 for (cis
= cip
->ci_suspects
; cis
!= NULL
; cis
= cis
->cis_next
)
606 if (fmd_case_match_suspect(cis
->cis_nvl
,
610 (void) fmd_asru_setflags(alp
, FMD_ASRU_FAULTY
);
611 *fcap
->fca_do_update
= 1;
617 * Re-fault all suspects that are still present in the new list.
620 fmd_case_fault_all_matching(fmd_asru_link_t
*alp
, void *arg
)
622 fca_t
*fcap
= (fca_t
*)arg
;
623 fmd_case_impl_t
*cip
= fcap
->fca_cip
;
624 fmd_case_susp_t
*cis
;
625 int state
= fmd_asru_al_getstate(alp
);
627 if (!(state
& FMD_ASRU_FAULTY
)) {
628 for (cis
= cip
->ci_suspects
; cis
!= NULL
; cis
= cis
->cis_next
)
629 if (fmd_case_match_suspect(cis
->cis_nvl
,
633 (void) fmd_asru_setflags(alp
, FMD_ASRU_FAULTY
);
634 *fcap
->fca_do_update
= 1;
640 * Acquit all suspects that are no longer present in the new list.
643 fmd_case_acquit_no_match(fmd_asru_link_t
*alp
, void *arg
)
645 fca_t
*fcap
= (fca_t
*)arg
;
646 fmd_case_impl_t
*cip
= fcap
->fca_cip
;
647 fmd_case_susp_t
*cis
;
648 int state
= fmd_asru_al_getstate(alp
);
650 if (state
& FMD_ASRU_FAULTY
) {
651 for (cis
= cip
->ci_suspects
; cis
!= NULL
; cis
= cis
->cis_next
)
652 if (fmd_case_match_suspect(cis
->cis_nvl
,
656 (void) fmd_asru_clrflags(alp
, FMD_ASRU_FAULTY
,
658 *fcap
->fca_do_update
= 1;
664 * Acquit all isolated suspects.
667 fmd_case_acquit_isolated(fmd_asru_link_t
*alp
, void *arg
)
669 int *do_update
= (int *)arg
;
670 int state
= fmd_asru_al_getstate(alp
);
672 if ((state
& FMD_ASRU_PRESENT
) && (state
& FMD_ASRU_UNUSABLE
) &&
673 (state
& FMD_ASRU_FAULTY
)) {
674 (void) fmd_asru_clrflags(alp
, FMD_ASRU_FAULTY
,
681 * Acquit suspect which matches specified nvlist
684 fmd_case_acquit_suspect(fmd_asru_link_t
*alp
, void *arg
)
686 nvlist_t
*nvl
= (nvlist_t
*)arg
;
687 int state
= fmd_asru_al_getstate(alp
);
689 if ((state
& FMD_ASRU_FAULTY
) &&
690 fmd_case_match_suspect(nvl
, alp
->al_event
) == 1)
691 (void) fmd_asru_clrflags(alp
, FMD_ASRU_FAULTY
,
696 fmd_case_impl_t
*fccd_cip
;
697 uint8_t *fccd_new_susp_state
;
698 uint8_t *fccd_new_match_state
;
699 int *fccd_discard_new
;
700 int *fccd_adjust_new
;
704 * see if a matching suspect list already exists in the cache
707 fmd_case_check_for_dups(fmd_case_t
*old_cp
, void *arg
)
709 fccd_t
*fccdp
= (fccd_t
*)arg
;
710 fmd_case_impl_t
*new_cip
= fccdp
->fccd_cip
;
711 fmd_case_impl_t
*old_cip
= (fmd_case_impl_t
*)old_cp
;
712 int i
, count
= 0, do_update
= 0, got_isolated_overlap
= 0;
713 int got_faulty_overlap
= 0;
714 int got_acquit_overlap
= 0;
715 boolean_t too_recent
;
716 uint64_t most_recent
= 0;
719 uint8_t *new_susp_state
;
720 uint8_t *old_susp_state
;
721 uint8_t *old_match_state
;
723 new_susp_state
= alloca(new_cip
->ci_nsuspects
* sizeof (uint8_t));
724 for (i
= 0; i
< new_cip
->ci_nsuspects
; i
++)
725 new_susp_state
[i
] = 0;
726 old_susp_state
= alloca(old_cip
->ci_nsuspects
* sizeof (uint8_t));
727 for (i
= 0; i
< old_cip
->ci_nsuspects
; i
++)
728 old_susp_state
[i
] = 0;
729 old_match_state
= alloca(old_cip
->ci_nsuspects
* sizeof (uint8_t));
730 for (i
= 0; i
< old_cip
->ci_nsuspects
; i
++)
731 old_match_state
[i
] = 0;
734 * Compare with each suspect in the existing case.
736 fcms
.fcms_countp
= &count
;
737 fcms
.fcms_maxcount
= old_cip
->ci_nsuspects
;
738 fcms
.fcms_cip
= new_cip
;
739 fcms
.fcms_new_susp_state
= new_susp_state
;
740 fcms
.fcms_old_susp_state
= old_susp_state
;
741 fcms
.fcms_old_match_state
= old_match_state
;
742 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, (fmd_case_t
*)old_cip
,
743 fmd_case_match_suspects
, &fcms
);
746 * If we have some faulty, non-isolated suspects that overlap, then most
747 * likely it is the suspects that overlap in the suspect lists that are
748 * to blame. So we can consider this to be a match.
750 for (i
= 0; i
< new_cip
->ci_nsuspects
; i
++)
751 if (new_susp_state
[i
] == SUSPECT_STATE_FAULTY
)
752 got_faulty_overlap
= 1;
753 if (got_faulty_overlap
&& fmd_case_match_on_faulty_overlap
)
757 * If we have no faulty, non-isolated suspects in the old case, but we
758 * do have some acquitted suspects that overlap, then most likely it is
759 * the acquitted suspects that overlap in the suspect lists that are
760 * to blame. So we can consider this to be a match.
762 for (i
= 0; i
< new_cip
->ci_nsuspects
; i
++)
763 if (new_susp_state
[i
] == SUSPECT_STATE_ACQUITED
)
764 got_acquit_overlap
= 1;
765 for (i
= 0; i
< old_cip
->ci_nsuspects
; i
++)
766 if (old_susp_state
[i
] == SUSPECT_STATE_FAULTY
)
767 got_acquit_overlap
= 0;
768 if (got_acquit_overlap
&& fmd_case_match_on_acquit_overlap
)
772 * Check that all suspects in the new list are present in the old list.
773 * Return if we find one that isn't.
775 for (i
= 0; i
< new_cip
->ci_nsuspects
; i
++)
776 if (new_susp_state
[i
] == 0)
780 * Check that all suspects in the old list are present in the new list
781 * *or* they are isolated or removed/replaced (which would explain why
782 * they are not present in the new list). Return if we find one that is
783 * faulty and unisolated or repaired or acquitted, and that is not
784 * present in the new case.
786 for (i
= 0; i
< old_cip
->ci_nsuspects
; i
++)
787 if (old_match_state
[i
] == SUSPECT_STATE_NO_MATCH
&&
788 (old_susp_state
[i
] == SUSPECT_STATE_FAULTY
||
789 old_susp_state
[i
] == SUSPECT_STATE_ACQUITED
||
790 old_susp_state
[i
] == SUSPECT_STATE_REPAIRED
))
795 * If the old case is already in repaired/resolved state, we can't
796 * do anything more with it, so keep the new case, but acquit some
797 * of the suspects if appropriate.
799 if (old_cip
->ci_state
>= FMD_CASE_REPAIRED
) {
800 if (fmd_case_auto_acquit_non_acquitted
) {
801 *fccdp
->fccd_adjust_new
= 1;
802 for (i
= 0; i
< new_cip
->ci_nsuspects
; i
++) {
803 fccdp
->fccd_new_susp_state
[i
] |=
805 if (new_susp_state
[i
] == 0)
806 fccdp
->fccd_new_susp_state
[i
] =
807 SUSPECT_STATE_NO_MATCH
;
814 * Otherwise discard the new case and keep the old, again updating the
815 * state of the suspects as appropriate
817 *fccdp
->fccd_discard_new
= 1;
818 fca
.fca_cip
= new_cip
;
819 fca
.fca_do_update
= &do_update
;
822 * See if new case occurred within fmd_case_too_recent seconds of the
823 * most recent modification to the old case and if so don't do
824 * auto-acquit. This avoids problems if a flood of ereports come in and
825 * they don't all get diagnosed before the first case causes some of
826 * the devices to be isolated making it appear that an isolated device
827 * was in the suspect list.
829 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, old_cp
,
830 fmd_asru_most_recent
, &most_recent
);
831 too_recent
= (new_cip
->ci_tv
.tv_sec
- most_recent
<
832 fmd_case_too_recent
);
834 if (got_faulty_overlap
) {
836 * Acquit any suspects not present in the new list, plus
837 * any that are are present but are isolated.
839 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, old_cp
,
840 fmd_case_acquit_no_match
, &fca
);
841 if (fmd_case_auto_acquit_isolated
&& !too_recent
)
842 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, old_cp
,
843 fmd_case_acquit_isolated
, &do_update
);
844 } else if (got_acquit_overlap
) {
846 * Re-fault the acquitted matching suspects and acquit all
849 if (fmd_case_auto_acquit_isolated
&& !too_recent
) {
850 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, old_cp
,
851 fmd_case_fault_acquitted_matching
, &fca
);
852 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, old_cp
,
853 fmd_case_acquit_isolated
, &do_update
);
855 } else if (fmd_case_auto_acquit_isolated
) {
857 * To get here, there must be no faulty or acquitted suspects,
858 * but there must be at least one isolated suspect. Just acquit
859 * non-matching isolated suspects. If there are no matching
860 * isolated suspects, then re-fault all matching suspects.
862 for (i
= 0; i
< new_cip
->ci_nsuspects
; i
++)
863 if (new_susp_state
[i
] == SUSPECT_STATE_ISOLATED
)
864 got_isolated_overlap
= 1;
865 if (!got_isolated_overlap
)
866 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, old_cp
,
867 fmd_case_fault_all_matching
, &fca
);
868 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, old_cp
,
869 fmd_case_acquit_no_match
, &fca
);
873 * If we've updated anything in the old case, call fmd_case_update()
876 fmd_case_update(old_cp
);
880 * Convict suspects in a case by applying a conviction policy and updating the
881 * resource cache prior to emitting the list.suspect event for the given case.
882 * At present, our policy is very simple: convict every suspect in the case.
883 * In the future, this policy can be extended and made configurable to permit:
885 * - convicting the suspect with the highest FIT rate
886 * - convicting the suspect with the cheapest FRU
887 * - convicting the suspect with the FRU that is in a depot's inventory
888 * - convicting the suspect with the longest lifetime
890 * and so forth. A word to the wise: this problem is significantly harder that
891 * it seems at first glance. Future work should heed the following advice:
893 * Hacking the policy into C code here is a very bad idea. The policy needs to
894 * be decided upon very carefully and fundamentally encodes knowledge of what
895 * suspect list combinations can be emitted by what diagnosis engines. As such
896 * fmd's code is the wrong location, because that would require fmd itself to
897 * be updated for every diagnosis engine change, defeating the entire design.
898 * The FMA Event Registry knows the suspect list combinations: policy inputs
899 * can be derived from it and used to produce per-module policy configuration.
901 * If the policy needs to be dynamic and not statically fixed at either fmd
902 * startup or module load time, any implementation of dynamic policy retrieval
903 * must employ some kind of caching mechanism or be part of a built-in module.
904 * The fmd_case_convict() function is called with locks held inside of fmd and
905 * is not a place where unbounded blocking on some inter-process or inter-
906 * system communication to another service (e.g. another daemon) can occur.
909 fmd_case_convict(fmd_case_t
*cp
)
911 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
912 fmd_asru_hash_t
*ahp
= fmd
.d_asrus
;
913 int discard_new
= 0, i
;
914 fmd_case_susp_t
*cis
;
915 fmd_asru_link_t
*alp
;
916 uint8_t *new_susp_state
;
917 uint8_t *new_match_state
;
920 fmd_case_impl_t
*ncp
, **cps
, **cpp
;
922 fmd_case_hash_t
*chp
;
925 * First we must see if any matching cases already exist.
927 new_susp_state
= alloca(cip
->ci_nsuspects
* sizeof (uint8_t));
928 for (i
= 0; i
< cip
->ci_nsuspects
; i
++)
929 new_susp_state
[i
] = 0;
930 new_match_state
= alloca(cip
->ci_nsuspects
* sizeof (uint8_t));
931 for (i
= 0; i
< cip
->ci_nsuspects
; i
++)
932 new_match_state
[i
] = 0;
934 fccd
.fccd_adjust_new
= &adjust_new
;
935 fccd
.fccd_new_susp_state
= new_susp_state
;
936 fccd
.fccd_new_match_state
= new_match_state
;
937 fccd
.fccd_discard_new
= &discard_new
;
943 (void) pthread_rwlock_rdlock(&chp
->ch_lock
);
944 cps
= cpp
= fmd_alloc(chp
->ch_count
* sizeof (fmd_case_t
*), FMD_SLEEP
);
946 for (i
= 0; i
< chp
->ch_hashlen
; i
++)
947 for (ncp
= chp
->ch_hash
[i
]; ncp
!= NULL
; ncp
= ncp
->ci_next
)
948 *cpp
++ = fmd_case_tryhold(ncp
);
949 ASSERT(cpp
== cps
+ cpc
);
950 (void) pthread_rwlock_unlock(&chp
->ch_lock
);
953 * Run fmd_case_check_for_dups() on all cases except the current one.
955 for (i
= 0; i
< cpc
; i
++) {
956 if (cps
[i
] != NULL
) {
957 if (cps
[i
] != (fmd_case_impl_t
*)cp
)
958 fmd_case_check_for_dups((fmd_case_t
*)cps
[i
],
960 fmd_case_rele((fmd_case_t
*)cps
[i
]);
963 fmd_free(cps
, cpc
* sizeof (fmd_case_t
*));
965 (void) pthread_mutex_lock(&cip
->ci_lock
);
966 if (cip
->ci_code
== NULL
)
967 (void) fmd_case_mkcode(cp
);
968 else if (cip
->ci_precanned
)
969 fmd_case_code_hash_insert(fmd
.d_cases
, cip
);
973 * We've found an existing case that is a match and it is not
974 * already in repaired or resolved state. So we can close this
975 * one as a duplicate.
977 (void) pthread_mutex_unlock(&cip
->ci_lock
);
982 * Allocate new cache entries
984 for (cis
= cip
->ci_suspects
; cis
!= NULL
; cis
= cis
->cis_next
) {
985 if ((alp
= fmd_asru_hash_create_entry(ahp
,
986 cp
, cis
->cis_nvl
)) == NULL
) {
987 fmd_error(EFMD_CASE_EVENT
, "cannot convict suspect in "
988 "%s: %s\n", cip
->ci_uuid
, fmd_strerror(errno
));
991 alp
->al_flags
|= FMD_ASRU_PRESENT
;
992 alp
->al_asru
->asru_flags
|= FMD_ASRU_PRESENT
;
993 (void) fmd_asru_clrflags(alp
, FMD_ASRU_UNUSABLE
, 0);
994 (void) fmd_asru_setflags(alp
, FMD_ASRU_FAULTY
);
998 int some_suspect
= 0, some_not_suspect
= 0;
1001 * There is one or more matching case but they are already in
1002 * repaired or resolved state. So we need to keep the new
1003 * case, but we can adjust it. Repaired/removed/replaced
1004 * suspects are unlikely to be to blame (unless there are
1005 * actually two separate faults). So if we have a combination of
1006 * repaired/replaced/removed suspects and acquitted suspects in
1007 * the old lists, then we should acquit in the new list those
1008 * that were repaired/replaced/removed in the old.
1010 for (i
= 0; i
< cip
->ci_nsuspects
; i
++) {
1011 if ((new_susp_state
[i
] & SUSPECT_STATE_REPLACED
) ||
1012 (new_susp_state
[i
] & SUSPECT_STATE_REPAIRED
) ||
1013 (new_susp_state
[i
] & SUSPECT_STATE_REMOVED
) ||
1014 (new_match_state
[i
] & SUSPECT_STATE_NO_MATCH
))
1015 some_not_suspect
= 1;
1019 if (some_suspect
&& some_not_suspect
) {
1020 for (cis
= cip
->ci_suspects
, i
= 0; cis
!= NULL
;
1021 cis
= cis
->cis_next
, i
++)
1022 if ((new_susp_state
[i
] &
1023 SUSPECT_STATE_REPLACED
) ||
1024 (new_susp_state
[i
] &
1025 SUSPECT_STATE_REPAIRED
) ||
1026 (new_susp_state
[i
] &
1027 SUSPECT_STATE_REMOVED
) ||
1028 (new_match_state
[i
] &
1029 SUSPECT_STATE_NO_MATCH
))
1030 fmd_asru_hash_apply_by_case(fmd
.d_asrus
,
1031 cp
, fmd_case_acquit_suspect
,
1036 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1041 fmd_case_publish(fmd_case_t
*cp
, uint_t state
)
1043 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1048 if (state
== FMD_CASE_CURRENT
)
1049 state
= cip
->ci_state
; /* use current state */
1052 case FMD_CASE_SOLVED
:
1053 (void) pthread_mutex_lock(&cip
->ci_lock
);
1056 * If we already have a code, then case is already solved.
1058 if (cip
->ci_precanned
== 0 && cip
->ci_xprt
== NULL
&&
1059 cip
->ci_code
!= NULL
) {
1060 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1064 if (cip
->ci_tv_valid
== 0) {
1065 fmd_time_gettimeofday(&cip
->ci_tv
);
1066 cip
->ci_tv_valid
= 1;
1068 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1070 if (fmd_case_convict(cp
) == 1) { /* dupclose */
1071 cip
->ci_flags
&= ~FMD_CF_SOLVED
;
1072 fmd_case_transition(cp
, FMD_CASE_CLOSE_WAIT
, 0);
1075 if (cip
->ci_xprt
!= NULL
) {
1077 * For proxy, save some information about the transport
1078 * in the resource cache.
1081 fmd_asru_set_on_proxy_t fasp
;
1082 fmd_xprt_impl_t
*xip
= (fmd_xprt_impl_t
*)cip
->ci_xprt
;
1084 fasp
.fasp_countp
= &count
;
1085 fasp
.fasp_maxcount
= cip
->ci_nsuspects
;
1086 fasp
.fasp_proxy_asru
= cip
->ci_proxy_asru
;
1087 fasp
.fasp_proxy_external
= xip
->xi_flags
&
1089 fasp
.fasp_proxy_rdonly
= ((xip
->xi_flags
&
1090 FMD_XPRT_RDWR
) == FMD_XPRT_RDONLY
);
1091 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
,
1092 fmd_asru_set_on_proxy
, &fasp
);
1094 nvl
= fmd_case_mkevent(cp
, FM_LIST_SUSPECT_CLASS
);
1095 (void) nvlist_lookup_string(nvl
, FM_CLASS
, &class);
1097 e
= fmd_event_create(FMD_EVT_PROTOCOL
, FMD_HRT_NOW
, nvl
, class);
1098 (void) pthread_rwlock_rdlock(&fmd
.d_log_lock
);
1099 fmd_log_append(fmd
.d_fltlog
, e
, cp
);
1100 (void) pthread_rwlock_unlock(&fmd
.d_log_lock
);
1101 fmd_dispq_dispatch(fmd
.d_disp
, e
, class);
1103 (void) pthread_mutex_lock(&cip
->ci_mod
->mod_stats_lock
);
1104 cip
->ci_mod
->mod_stats
->ms_casesolved
.fmds_value
.ui64
++;
1105 (void) pthread_mutex_unlock(&cip
->ci_mod
->mod_stats_lock
);
1109 case FMD_CASE_CLOSE_WAIT
:
1111 e
= fmd_event_create(FMD_EVT_CLOSE
, FMD_HRT_NOW
, NULL
, cp
);
1112 fmd_eventq_insert_at_head(cip
->ci_mod
->mod_queue
, e
);
1114 (void) pthread_mutex_lock(&cip
->ci_mod
->mod_stats_lock
);
1115 cip
->ci_mod
->mod_stats
->ms_caseclosed
.fmds_value
.ui64
++;
1116 (void) pthread_mutex_unlock(&cip
->ci_mod
->mod_stats_lock
);
1120 case FMD_CASE_CLOSED
:
1121 nvl
= fmd_case_mkevent(cp
, FM_LIST_ISOLATED_CLASS
);
1122 (void) nvlist_lookup_string(nvl
, FM_CLASS
, &class);
1123 e
= fmd_event_create(FMD_EVT_PROTOCOL
, FMD_HRT_NOW
, nvl
, class);
1124 fmd_dispq_dispatch(fmd
.d_disp
, e
, class);
1127 case FMD_CASE_REPAIRED
:
1128 nvl
= fmd_case_mkevent(cp
, FM_LIST_REPAIRED_CLASS
);
1129 (void) nvlist_lookup_string(nvl
, FM_CLASS
, &class);
1130 e
= fmd_event_create(FMD_EVT_PROTOCOL
, FMD_HRT_NOW
, nvl
, class);
1131 (void) pthread_rwlock_rdlock(&fmd
.d_log_lock
);
1132 fmd_log_append(fmd
.d_fltlog
, e
, cp
);
1133 (void) pthread_rwlock_unlock(&fmd
.d_log_lock
);
1134 fmd_dispq_dispatch(fmd
.d_disp
, e
, class);
1137 case FMD_CASE_RESOLVED
:
1138 nvl
= fmd_case_mkevent(cp
, FM_LIST_RESOLVED_CLASS
);
1139 (void) nvlist_lookup_string(nvl
, FM_CLASS
, &class);
1140 e
= fmd_event_create(FMD_EVT_PROTOCOL
, FMD_HRT_NOW
, nvl
, class);
1141 (void) pthread_rwlock_rdlock(&fmd
.d_log_lock
);
1142 fmd_log_append(fmd
.d_fltlog
, e
, cp
);
1143 (void) pthread_rwlock_unlock(&fmd
.d_log_lock
);
1144 fmd_dispq_dispatch(fmd
.d_disp
, e
, class);
1150 fmd_case_hash_lookup(fmd_case_hash_t
*chp
, const char *uuid
)
1152 fmd_case_impl_t
*cip
;
1155 (void) pthread_rwlock_rdlock(&chp
->ch_lock
);
1156 h
= fmd_strhash(uuid
) % chp
->ch_hashlen
;
1158 for (cip
= chp
->ch_hash
[h
]; cip
!= NULL
; cip
= cip
->ci_next
) {
1159 if (strcmp(cip
->ci_uuid
, uuid
) == 0)
1164 * If deleting bit is set, treat the case as if it doesn't exist.
1167 cip
= fmd_case_tryhold(cip
);
1170 (void) fmd_set_errno(EFMD_CASE_INVAL
);
1172 (void) pthread_rwlock_unlock(&chp
->ch_lock
);
1173 return ((fmd_case_t
*)cip
);
1176 static fmd_case_impl_t
*
1177 fmd_case_hash_insert(fmd_case_hash_t
*chp
, fmd_case_impl_t
*cip
)
1179 fmd_case_impl_t
*eip
;
1182 (void) pthread_rwlock_wrlock(&chp
->ch_lock
);
1183 h
= fmd_strhash(cip
->ci_uuid
) % chp
->ch_hashlen
;
1185 for (eip
= chp
->ch_hash
[h
]; eip
!= NULL
; eip
= eip
->ci_next
) {
1186 if (strcmp(cip
->ci_uuid
, eip
->ci_uuid
) == 0 &&
1187 fmd_case_tryhold(eip
) != NULL
) {
1188 (void) pthread_rwlock_unlock(&chp
->ch_lock
);
1189 return (eip
); /* uuid already present */
1193 cip
->ci_next
= chp
->ch_hash
[h
];
1194 chp
->ch_hash
[h
] = cip
;
1197 ASSERT(chp
->ch_count
!= 0);
1199 (void) pthread_rwlock_unlock(&chp
->ch_lock
);
1204 fmd_case_hash_delete(fmd_case_hash_t
*chp
, fmd_case_impl_t
*cip
)
1206 fmd_case_impl_t
*cp
, **pp
;
1209 ASSERT(MUTEX_HELD(&cip
->ci_lock
));
1211 cip
->ci_flags
|= FMD_CF_DELETING
;
1212 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1214 (void) pthread_rwlock_wrlock(&chp
->ch_lock
);
1216 h
= fmd_strhash(cip
->ci_uuid
) % chp
->ch_hashlen
;
1217 pp
= &chp
->ch_hash
[h
];
1219 for (cp
= *pp
; cp
!= NULL
; cp
= cp
->ci_next
) {
1227 fmd_panic("case %p (%s) not found on hash chain %u\n",
1228 (void *)cip
, cip
->ci_uuid
, h
);
1235 * delete from code hash if it is on it
1237 fmd_case_code_hash_delete(chp
, cip
);
1239 ASSERT(chp
->ch_count
!= 0);
1242 (void) pthread_rwlock_unlock(&chp
->ch_lock
);
1244 (void) pthread_mutex_lock(&cip
->ci_lock
);
1245 ASSERT(cip
->ci_flags
& FMD_CF_DELETING
);
1249 fmd_case_create(fmd_module_t
*mp
, const char *uuidstr
, void *data
)
1251 fmd_case_impl_t
*cip
= fmd_zalloc(sizeof (fmd_case_impl_t
), FMD_SLEEP
);
1252 fmd_case_impl_t
*eip
= NULL
;
1255 (void) pthread_mutex_init(&cip
->ci_lock
, NULL
);
1256 fmd_buf_hash_create(&cip
->ci_bufs
);
1258 fmd_module_hold(mp
);
1261 cip
->ci_state
= FMD_CASE_UNSOLVED
;
1262 cip
->ci_flags
= FMD_CF_DIRTY
;
1263 cip
->ci_data
= data
;
1266 * Calling libuuid: get a clue. The library interfaces cleverly do not
1267 * define any constant for the length of an unparse string, and do not
1268 * permit the caller to specify a buffer length for safety. The spec
1269 * says it will be 36 bytes, but we make it tunable just in case.
1271 (void) fmd_conf_getprop(fmd
.d_conf
, "uuidlen", &cip
->ci_uuidlen
);
1272 cip
->ci_uuid
= fmd_zalloc(cip
->ci_uuidlen
+ 1, FMD_SLEEP
);
1274 if (uuidstr
== NULL
) {
1276 * We expect this loop to execute only once, but code it
1277 * defensively against the possibility of libuuid bugs.
1278 * Keep generating uuids and attempting to do a hash insert
1279 * until we get a unique one.
1283 fmd_case_rele((fmd_case_t
*)eip
);
1284 uuid_generate(uuid
);
1285 uuid_unparse(uuid
, cip
->ci_uuid
);
1286 } while ((eip
= fmd_case_hash_insert(fmd
.d_cases
, cip
)) != cip
);
1289 * If a uuid was specified we must succeed with that uuid,
1290 * or return NULL indicating a case with that uuid already
1293 (void) strncpy(cip
->ci_uuid
, uuidstr
, cip
->ci_uuidlen
+ 1);
1294 if (fmd_case_hash_insert(fmd
.d_cases
, cip
) != cip
) {
1295 fmd_free(cip
->ci_uuid
, cip
->ci_uuidlen
+ 1);
1296 (void) fmd_buf_hash_destroy(&cip
->ci_bufs
);
1297 fmd_module_rele(mp
);
1298 pthread_mutex_destroy(&cip
->ci_lock
);
1299 fmd_free(cip
, sizeof (*cip
));
1304 ASSERT(fmd_module_locked(mp
));
1305 fmd_list_append(&mp
->mod_cases
, cip
);
1306 fmd_module_setcdirty(mp
);
1308 (void) pthread_mutex_lock(&cip
->ci_mod
->mod_stats_lock
);
1309 cip
->ci_mod
->mod_stats
->ms_caseopen
.fmds_value
.ui64
++;
1310 (void) pthread_mutex_unlock(&cip
->ci_mod
->mod_stats_lock
);
1312 return ((fmd_case_t
*)cip
);
1316 fmd_case_destroy_suspects(fmd_case_impl_t
*cip
)
1318 fmd_case_susp_t
*cis
, *ncis
;
1320 ASSERT(MUTEX_HELD(&cip
->ci_lock
));
1322 if (cip
->ci_proxy_asru
)
1323 fmd_free(cip
->ci_proxy_asru
, sizeof (uint8_t) *
1325 nvlist_free(cip
->ci_diag_de
);
1326 if (cip
->ci_diag_asru
)
1327 fmd_free(cip
->ci_diag_asru
, sizeof (uint8_t) *
1330 for (cis
= cip
->ci_suspects
; cis
!= NULL
; cis
= ncis
) {
1331 ncis
= cis
->cis_next
;
1332 nvlist_free(cis
->cis_nvl
);
1333 fmd_free(cis
, sizeof (fmd_case_susp_t
));
1336 cip
->ci_suspects
= NULL
;
1337 cip
->ci_nsuspects
= 0;
1341 fmd_case_recreate(fmd_module_t
*mp
, fmd_xprt_t
*xp
,
1342 uint_t state
, const char *uuid
, const char *code
)
1344 fmd_case_impl_t
*cip
= fmd_zalloc(sizeof (fmd_case_impl_t
), FMD_SLEEP
);
1345 fmd_case_impl_t
*eip
;
1347 (void) pthread_mutex_init(&cip
->ci_lock
, NULL
);
1348 fmd_buf_hash_create(&cip
->ci_bufs
);
1350 fmd_module_hold(mp
);
1354 cip
->ci_state
= state
;
1355 cip
->ci_uuid
= fmd_strdup(uuid
, FMD_SLEEP
);
1356 cip
->ci_uuidlen
= strlen(cip
->ci_uuid
);
1357 cip
->ci_code
= fmd_strdup(code
, FMD_SLEEP
);
1358 cip
->ci_codelen
= cip
->ci_code
? strlen(cip
->ci_code
) + 1 : 0;
1360 if (state
> FMD_CASE_CLOSE_WAIT
)
1361 cip
->ci_flags
|= FMD_CF_SOLVED
;
1364 * Insert the case into the global case hash. If the specified UUID is
1365 * already present, check to see if it is an orphan: if so, reclaim it;
1366 * otherwise if it is owned by a different module then return NULL.
1368 if ((eip
= fmd_case_hash_insert(fmd
.d_cases
, cip
)) != cip
) {
1369 (void) pthread_mutex_lock(&cip
->ci_lock
);
1370 cip
->ci_refs
--; /* decrement to zero */
1371 fmd_case_destroy((fmd_case_t
*)cip
, B_FALSE
);
1373 cip
= eip
; /* switch 'cip' to the existing case */
1374 (void) pthread_mutex_lock(&cip
->ci_lock
);
1377 * If the ASRU cache is trying to recreate an orphan, then just
1378 * return the existing case that we found without changing it.
1380 if (mp
== fmd
.d_rmod
) {
1382 * In case the case has already been created from
1383 * a checkpoint file we need to set up code now.
1385 if (cip
->ci_state
< FMD_CASE_CLOSED
) {
1386 if (code
!= NULL
&& cip
->ci_code
== NULL
) {
1387 cip
->ci_code
= fmd_strdup(code
,
1389 cip
->ci_codelen
= cip
->ci_code
?
1390 strlen(cip
->ci_code
) + 1 : 0;
1391 fmd_case_code_hash_insert(fmd
.d_cases
,
1397 * When recreating an orphan case, state passed in may
1398 * be CLOSED (faulty) or REPAIRED/RESOLVED (!faulty). If
1399 * any suspects are still CLOSED (faulty) then the
1400 * overall state needs to be CLOSED.
1402 if ((cip
->ci_state
== FMD_CASE_REPAIRED
||
1403 cip
->ci_state
== FMD_CASE_RESOLVED
) &&
1404 state
== FMD_CASE_CLOSED
)
1405 cip
->ci_state
= FMD_CASE_CLOSED
;
1406 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1407 fmd_case_rele((fmd_case_t
*)cip
);
1408 return ((fmd_case_t
*)cip
);
1412 * If the existing case isn't an orphan or is being proxied,
1413 * then we have a UUID conflict: return failure to the caller.
1415 if (cip
->ci_mod
!= fmd
.d_rmod
|| xp
!= NULL
) {
1416 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1417 fmd_case_rele((fmd_case_t
*)cip
);
1422 * If the new module is reclaiming an orphaned case, remove
1423 * the case from the root module, switch ci_mod, and then fall
1424 * through to adding the case to the new owner module 'mp'.
1426 fmd_module_lock(cip
->ci_mod
);
1427 fmd_list_delete(&cip
->ci_mod
->mod_cases
, cip
);
1428 fmd_module_unlock(cip
->ci_mod
);
1430 fmd_module_rele(cip
->ci_mod
);
1432 fmd_module_hold(mp
);
1435 * It's possible that fmd crashed or was restarted during a
1436 * previous solve operation between the asru cache being created
1437 * and the ckpt file being updated to SOLVED. Thus when the DE
1438 * recreates the case here from the checkpoint file, the state
1439 * will be UNSOLVED and yet we are having to reclaim because
1440 * the case was in the asru cache. If this happens, revert the
1441 * case back to the UNSOLVED state and let the DE solve it again
1443 if (state
== FMD_CASE_UNSOLVED
) {
1444 fmd_asru_hash_delete_case(fmd
.d_asrus
,
1446 fmd_case_destroy_suspects(cip
);
1447 fmd_case_code_hash_delete(fmd
.d_cases
, cip
);
1448 fmd_free(cip
->ci_code
, cip
->ci_codelen
);
1449 cip
->ci_code
= NULL
;
1450 cip
->ci_codelen
= 0;
1451 cip
->ci_tv_valid
= 0;
1454 cip
->ci_state
= state
;
1456 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1457 fmd_case_rele((fmd_case_t
*)cip
);
1460 * add into hash of solved cases
1463 fmd_case_code_hash_insert(fmd
.d_cases
, cip
);
1466 ASSERT(fmd_module_locked(mp
));
1467 fmd_list_append(&mp
->mod_cases
, cip
);
1469 (void) pthread_mutex_lock(&cip
->ci_mod
->mod_stats_lock
);
1470 cip
->ci_mod
->mod_stats
->ms_caseopen
.fmds_value
.ui64
++;
1471 (void) pthread_mutex_unlock(&cip
->ci_mod
->mod_stats_lock
);
1473 return ((fmd_case_t
*)cip
);
1477 fmd_case_destroy(fmd_case_t
*cp
, int visible
)
1479 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1480 fmd_case_item_t
*cit
, *ncit
;
1482 ASSERT(MUTEX_HELD(&cip
->ci_lock
));
1483 ASSERT(cip
->ci_refs
== 0);
1486 TRACE((FMD_DBG_CASE
, "deleting case %s", cip
->ci_uuid
));
1487 fmd_case_hash_delete(fmd
.d_cases
, cip
);
1490 for (cit
= cip
->ci_items
; cit
!= NULL
; cit
= ncit
) {
1491 ncit
= cit
->cit_next
;
1492 fmd_event_rele(cit
->cit_event
);
1493 fmd_free(cit
, sizeof (fmd_case_item_t
));
1496 fmd_case_destroy_suspects(cip
);
1498 if (cip
->ci_principal
!= NULL
)
1499 fmd_event_rele(cip
->ci_principal
);
1501 fmd_free(cip
->ci_uuid
, cip
->ci_uuidlen
+ 1);
1502 fmd_free(cip
->ci_code
, cip
->ci_codelen
);
1503 (void) fmd_buf_hash_destroy(&cip
->ci_bufs
);
1505 fmd_module_rele(cip
->ci_mod
);
1506 fmd_free(cip
, sizeof (fmd_case_impl_t
));
1510 fmd_case_hold(fmd_case_t
*cp
)
1512 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1514 (void) pthread_mutex_lock(&cip
->ci_lock
);
1515 fmd_case_hold_locked(cp
);
1516 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1520 fmd_case_hold_locked(fmd_case_t
*cp
)
1522 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1524 ASSERT(MUTEX_HELD(&cip
->ci_lock
));
1525 if (cip
->ci_flags
& FMD_CF_DELETING
)
1526 fmd_panic("attempt to hold a deleting case %p (%s)\n",
1527 (void *)cip
, cip
->ci_uuid
);
1529 ASSERT(cip
->ci_refs
!= 0);
1532 static fmd_case_impl_t
*
1533 fmd_case_tryhold(fmd_case_impl_t
*cip
)
1536 * If the case's "deleting" bit is unset, hold and return case,
1537 * otherwise, return NULL.
1539 (void) pthread_mutex_lock(&cip
->ci_lock
);
1540 if (cip
->ci_flags
& FMD_CF_DELETING
) {
1541 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1544 fmd_case_hold_locked((fmd_case_t
*)cip
);
1545 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1551 fmd_case_rele(fmd_case_t
*cp
)
1553 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1555 (void) pthread_mutex_lock(&cip
->ci_lock
);
1556 ASSERT(cip
->ci_refs
!= 0);
1558 if (--cip
->ci_refs
== 0)
1559 fmd_case_destroy((fmd_case_t
*)cip
, B_TRUE
);
1561 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1565 fmd_case_rele_locked(fmd_case_t
*cp
)
1567 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1569 ASSERT(MUTEX_HELD(&cip
->ci_lock
));
1571 ASSERT(cip
->ci_refs
!= 0);
1575 fmd_case_insert_principal(fmd_case_t
*cp
, fmd_event_t
*ep
)
1577 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1578 fmd_case_item_t
*cit
;
1584 (void) pthread_mutex_lock(&cip
->ci_lock
);
1586 if (cip
->ci_flags
& FMD_CF_SOLVED
)
1587 state
= FMD_EVS_DIAGNOSED
;
1589 state
= FMD_EVS_ACCEPTED
;
1591 oep
= cip
->ci_principal
;
1592 cip
->ci_principal
= ep
;
1594 for (cit
= cip
->ci_items
; cit
!= NULL
; cit
= cit
->cit_next
) {
1595 if (cit
->cit_event
== ep
)
1599 cip
->ci_flags
|= FMD_CF_DIRTY
;
1600 new = cit
== NULL
&& ep
!= oep
;
1602 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1604 fmd_module_setcdirty(cip
->ci_mod
);
1605 fmd_event_transition(ep
, state
);
1608 fmd_event_rele(oep
);
1614 fmd_case_insert_event(fmd_case_t
*cp
, fmd_event_t
*ep
)
1616 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1617 fmd_case_item_t
*cit
;
1622 (void) pthread_mutex_lock(&cip
->ci_lock
);
1624 if (cip
->ci_flags
& FMD_CF_SOLVED
)
1625 state
= FMD_EVS_DIAGNOSED
;
1627 state
= FMD_EVS_ACCEPTED
;
1629 for (cit
= cip
->ci_items
; cit
!= NULL
; cit
= cit
->cit_next
) {
1630 if (cit
->cit_event
== ep
)
1634 new = cit
== NULL
&& ep
!= cip
->ci_principal
;
1637 * If the event is already in the case or the case is already solved,
1638 * there is no reason to save it: just transition it appropriately.
1640 if (cit
!= NULL
|| (cip
->ci_flags
& FMD_CF_SOLVED
)) {
1641 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1642 fmd_event_transition(ep
, state
);
1646 cit
= fmd_alloc(sizeof (fmd_case_item_t
), FMD_SLEEP
);
1649 if (nvlist_lookup_boolean_value(((fmd_event_impl_t
*)ep
)->ev_nvl
,
1650 "__injected", &injected
) == 0 && injected
)
1651 fmd_case_set_injected(cp
);
1653 cit
->cit_next
= cip
->ci_items
;
1654 cit
->cit_event
= ep
;
1656 cip
->ci_items
= cit
;
1659 cip
->ci_flags
|= FMD_CF_DIRTY
;
1660 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1662 fmd_module_setcdirty(cip
->ci_mod
);
1663 fmd_event_transition(ep
, state
);
1669 fmd_case_insert_suspect(fmd_case_t
*cp
, nvlist_t
*nvl
)
1671 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1672 fmd_case_susp_t
*cis
= fmd_alloc(sizeof (fmd_case_susp_t
), FMD_SLEEP
);
1674 (void) pthread_mutex_lock(&cip
->ci_lock
);
1675 ASSERT(cip
->ci_state
< FMD_CASE_CLOSE_WAIT
);
1676 cip
->ci_flags
|= FMD_CF_DIRTY
;
1678 cis
->cis_next
= cip
->ci_suspects
;
1681 cip
->ci_suspects
= cis
;
1682 cip
->ci_nsuspects
++;
1684 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1685 if (cip
->ci_xprt
== NULL
)
1686 fmd_module_setcdirty(cip
->ci_mod
);
1690 fmd_case_recreate_suspect(fmd_case_t
*cp
, nvlist_t
*nvl
)
1692 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1693 fmd_case_susp_t
*cis
= fmd_alloc(sizeof (fmd_case_susp_t
), FMD_SLEEP
);
1696 (void) pthread_mutex_lock(&cip
->ci_lock
);
1698 cis
->cis_next
= cip
->ci_suspects
;
1701 if (nvlist_lookup_boolean_value(nvl
,
1702 FM_SUSPECT_MESSAGE
, &b
) == 0 && b
== B_FALSE
)
1703 cip
->ci_flags
|= FMD_CF_INVISIBLE
;
1705 cip
->ci_suspects
= cis
;
1706 cip
->ci_nsuspects
++;
1708 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1712 fmd_case_reset_suspects(fmd_case_t
*cp
)
1714 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1716 (void) pthread_mutex_lock(&cip
->ci_lock
);
1717 ASSERT(cip
->ci_state
< FMD_CASE_SOLVED
);
1719 fmd_case_destroy_suspects(cip
);
1720 cip
->ci_flags
|= FMD_CF_DIRTY
;
1722 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1723 fmd_module_setcdirty(cip
->ci_mod
);
1728 fmd_case_unusable(fmd_asru_link_t
*alp
, void *arg
)
1730 (void) fmd_asru_setflags(alp
, FMD_ASRU_UNUSABLE
);
1734 * Grab ci_lock and update the case state and set the dirty bit. Then perform
1735 * whatever actions and emit whatever events are appropriate for the state.
1736 * Refer to the topmost block comment explaining the state machine for details.
1739 fmd_case_transition(fmd_case_t
*cp
, uint_t state
, uint_t flags
)
1741 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1742 fmd_case_item_t
*cit
;
1745 int any_unusable_and_present
= 0;
1747 ASSERT(state
<= FMD_CASE_RESOLVED
);
1748 (void) pthread_mutex_lock(&cip
->ci_lock
);
1750 if (!(cip
->ci_flags
& FMD_CF_SOLVED
) && !(flags
& FMD_CF_SOLVED
))
1751 flags
&= ~(FMD_CF_ISOLATED
| FMD_CF_REPAIRED
| FMD_CF_RESOLVED
);
1753 cip
->ci_flags
|= flags
;
1755 if (cip
->ci_state
>= state
) {
1756 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1757 return; /* already in specified state */
1760 TRACE((FMD_DBG_CASE
, "case %s %s->%s", cip
->ci_uuid
,
1761 _fmd_case_snames
[cip
->ci_state
], _fmd_case_snames
[state
]));
1763 cip
->ci_state
= state
;
1764 cip
->ci_flags
|= FMD_CF_DIRTY
;
1766 if (cip
->ci_xprt
== NULL
&& cip
->ci_mod
!= fmd
.d_rmod
)
1767 fmd_module_setcdirty(cip
->ci_mod
);
1770 case FMD_CASE_SOLVED
:
1771 for (cit
= cip
->ci_items
; cit
!= NULL
; cit
= cit
->cit_next
)
1772 fmd_event_transition(cit
->cit_event
, FMD_EVS_DIAGNOSED
);
1774 if (cip
->ci_principal
!= NULL
) {
1775 fmd_event_transition(cip
->ci_principal
,
1780 case FMD_CASE_CLOSE_WAIT
:
1782 * If the case was never solved, do not change ASRUs.
1783 * If the case was never fmd_case_closed, do not change ASRUs.
1784 * If the case was repaired, do not change ASRUs.
1786 if ((cip
->ci_flags
& (FMD_CF_SOLVED
| FMD_CF_ISOLATED
|
1787 FMD_CF_REPAIRED
)) == (FMD_CF_SOLVED
| FMD_CF_ISOLATED
))
1788 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
,
1789 fmd_case_unusable
, NULL
);
1792 * If an orphaned case transitions to CLOSE_WAIT, the owning
1793 * module is no longer loaded: continue on to CASE_CLOSED or
1794 * CASE_REPAIRED as appropriate.
1796 if (fmd_case_orphaned(cp
)) {
1797 if (cip
->ci_flags
& FMD_CF_REPAIRED
) {
1798 state
= cip
->ci_state
= FMD_CASE_REPAIRED
;
1799 TRACE((FMD_DBG_CASE
, "case %s %s->%s",
1801 _fmd_case_snames
[FMD_CASE_CLOSE_WAIT
],
1802 _fmd_case_snames
[FMD_CASE_REPAIRED
]));
1805 state
= cip
->ci_state
= FMD_CASE_CLOSED
;
1806 TRACE((FMD_DBG_CASE
, "case %s %s->%s",
1808 _fmd_case_snames
[FMD_CASE_CLOSE_WAIT
],
1809 _fmd_case_snames
[FMD_CASE_CLOSED
]));
1814 case FMD_CASE_REPAIRED
:
1816 ASSERT(cip
->ci_xprt
!= NULL
|| fmd_case_orphaned(cp
));
1819 * If we've been requested to transition straight on to the
1820 * RESOLVED state (which can happen with fault proxying where a
1821 * list.resolved or a uuresolved is received from the other
1822 * side), or if all suspects are already either usable or not
1823 * present then transition straight to RESOLVED state,
1824 * publishing both the list.repaired and list.resolved. For a
1825 * proxy, if we discover here that all suspects are already
1826 * either usable or not present, notify the diag side instead
1827 * using fmd_xprt_uuresolved().
1829 if (flags
& FMD_CF_RESOLVED
) {
1830 if (cip
->ci_xprt
!= NULL
)
1831 fmd_list_delete(&cip
->ci_mod
->mod_cases
, cip
);
1833 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
,
1834 fmd_case_unusable_and_present
,
1835 &any_unusable_and_present
);
1836 if (any_unusable_and_present
)
1838 if (cip
->ci_xprt
!= NULL
) {
1839 fmd_xprt_uuresolved(cip
->ci_xprt
, cip
->ci_uuid
);
1844 cip
->ci_state
= FMD_CASE_RESOLVED
;
1845 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1846 fmd_case_publish(cp
, state
);
1847 TRACE((FMD_DBG_CASE
, "case %s %s->%s", cip
->ci_uuid
,
1848 _fmd_case_snames
[FMD_CASE_REPAIRED
],
1849 _fmd_case_snames
[FMD_CASE_RESOLVED
]));
1850 state
= FMD_CASE_RESOLVED
;
1852 (void) pthread_mutex_lock(&cip
->ci_lock
);
1855 case FMD_CASE_RESOLVED
:
1857 * For a proxy, no need to check that all suspects are already
1858 * either usable or not present - this request has come from
1859 * the diagnosing side which makes the final decision on this.
1861 if (cip
->ci_xprt
!= NULL
) {
1862 fmd_list_delete(&cip
->ci_mod
->mod_cases
, cip
);
1867 ASSERT(fmd_case_orphaned(cp
));
1870 * If all suspects are already either usable or not present then
1871 * carry on, publish list.resolved and discard the case.
1873 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
,
1874 fmd_case_unusable_and_present
, &any_unusable_and_present
);
1875 if (any_unusable_and_present
) {
1876 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1884 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1887 * If the module has initialized, then publish the appropriate event
1888 * for the new case state. If not, we are being called from the
1889 * checkpoint code during module load, in which case the module's
1890 * _fmd_init() routine hasn't finished yet, and our event dictionaries
1891 * may not be open yet, which will prevent us from computing the event
1892 * code. Defer the call to fmd_case_publish() by enqueuing a PUBLISH
1893 * event in our queue: this won't be processed until _fmd_init is done.
1895 if (cip
->ci_mod
->mod_flags
& FMD_MOD_INIT
)
1896 fmd_case_publish(cp
, state
);
1899 e
= fmd_event_create(FMD_EVT_PUBLISH
, FMD_HRT_NOW
, NULL
, cp
);
1900 fmd_eventq_insert_at_head(cip
->ci_mod
->mod_queue
, e
);
1904 if (cip
->ci_xprt
!= NULL
) {
1906 * If we transitioned to RESOLVED, adjust the reference
1907 * count to reflect our removal from
1908 * fmd.d_rmod->mod_cases above. If the caller has not
1909 * placed an additional hold on the case, it will now
1912 (void) pthread_mutex_lock(&cip
->ci_lock
);
1913 fmd_asru_hash_delete_case(fmd
.d_asrus
, cp
);
1914 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1917 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
,
1918 fmd_asru_log_resolved
, NULL
);
1919 (void) pthread_mutex_lock(&cip
->ci_lock
);
1920 /* mark as "ready to be discarded */
1921 cip
->ci_flags
|= FMD_CF_RES_CMPL
;
1922 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1928 * Discard any case if it is in RESOLVED state (and if check_if_aged argument
1929 * is set if all suspects have passed the rsrc.aged time).
1932 fmd_case_discard_resolved(fmd_case_t
*cp
, void *arg
)
1934 int check_if_aged
= *(int *)arg
;
1935 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1938 * First check if case has completed transition to resolved.
1940 (void) pthread_mutex_lock(&cip
->ci_lock
);
1941 if (!(cip
->ci_flags
& FMD_CF_RES_CMPL
)) {
1942 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1947 * Now if check_is_aged is set, see if all suspects have aged.
1949 if (check_if_aged
) {
1952 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
,
1953 fmd_asru_check_if_aged
, &aged
);
1955 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1961 * Finally discard the case, clearing FMD_CF_RES_CMPL so we don't
1964 fmd_module_lock(cip
->ci_mod
);
1965 fmd_list_delete(&cip
->ci_mod
->mod_cases
, cip
);
1966 fmd_module_unlock(cip
->ci_mod
);
1967 fmd_asru_hash_delete_case(fmd
.d_asrus
, cp
);
1968 cip
->ci_flags
&= ~FMD_CF_RES_CMPL
;
1969 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1974 * Transition the specified case to *at least* the specified state by first
1975 * re-validating the suspect list using the resource cache. This function is
1976 * employed by the checkpoint code when restoring a saved, solved case to see
1977 * if the state of the case has effectively changed while fmd was not running
1978 * or the module was not loaded.
1981 fmd_case_transition_update(fmd_case_t
*cp
, uint_t state
, uint_t flags
)
1983 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
1985 int usable
= 0; /* are any suspects usable? */
1987 ASSERT(state
>= FMD_CASE_SOLVED
);
1988 (void) pthread_mutex_lock(&cip
->ci_lock
);
1990 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
, fmd_case_usable
, &usable
);
1992 (void) pthread_mutex_unlock(&cip
->ci_lock
);
1995 state
= MAX(state
, FMD_CASE_CLOSE_WAIT
);
1996 flags
|= FMD_CF_ISOLATED
;
1999 fmd_case_transition(cp
, state
, flags
);
2003 fmd_case_setdirty(fmd_case_t
*cp
)
2005 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2007 (void) pthread_mutex_lock(&cip
->ci_lock
);
2008 cip
->ci_flags
|= FMD_CF_DIRTY
;
2009 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2011 fmd_module_setcdirty(cip
->ci_mod
);
2015 fmd_case_clrdirty(fmd_case_t
*cp
)
2017 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2019 (void) pthread_mutex_lock(&cip
->ci_lock
);
2020 cip
->ci_flags
&= ~FMD_CF_DIRTY
;
2021 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2025 fmd_case_commit(fmd_case_t
*cp
)
2027 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2028 fmd_case_item_t
*cit
;
2030 (void) pthread_mutex_lock(&cip
->ci_lock
);
2032 if (cip
->ci_flags
& FMD_CF_DIRTY
) {
2033 for (cit
= cip
->ci_items
; cit
!= NULL
; cit
= cit
->cit_next
)
2034 fmd_event_commit(cit
->cit_event
);
2036 if (cip
->ci_principal
!= NULL
)
2037 fmd_event_commit(cip
->ci_principal
);
2039 fmd_buf_hash_commit(&cip
->ci_bufs
);
2040 cip
->ci_flags
&= ~FMD_CF_DIRTY
;
2043 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2047 * On proxy side, send back repair/acquit/etc request to diagnosing side
2050 fmd_case_xprt_updated(fmd_case_t
*cp
)
2052 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2059 ASSERT(cip
->ci_xprt
!= NULL
);
2060 (void) pthread_mutex_lock(&cip
->ci_lock
);
2061 ba
= alloca(sizeof (uint8_t) * cip
->ci_nsuspects
);
2062 nva
= alloca(sizeof (nvlist_t
*) * cip
->ci_nsuspects
);
2063 fcl
.fcl_countp
= &count
;
2064 fcl
.fcl_maxcount
= cip
->ci_nsuspects
;
2065 fcl
.fcl_msgp
= &msg
;
2068 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
, fmd_case_set_lst
, &fcl
);
2069 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2070 fmd_xprt_updated(cip
->ci_xprt
, cip
->ci_uuid
, ba
, cip
->ci_proxy_asru
,
2075 * fmd_case_update_status() can be called on either the proxy side when a
2076 * list.suspect is received, or on the diagnosing side when an update request
2077 * is received from the proxy. It updates the status in the resource cache.
2080 fmd_case_update_status(fmd_case_t
*cp
, uint8_t *statusp
, uint8_t *proxy_asrup
,
2081 uint8_t *diag_asrup
)
2083 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2085 fmd_asru_update_status_t faus
;
2088 * update status of resource cache entries
2090 faus
.faus_countp
= &count
;
2091 faus
.faus_maxcount
= cip
->ci_nsuspects
;
2092 faus
.faus_ba
= statusp
;
2093 faus
.faus_proxy_asru
= proxy_asrup
;
2094 faus
.faus_diag_asru
= diag_asrup
;
2095 faus
.faus_is_proxy
= (cip
->ci_xprt
!= NULL
);
2096 (void) pthread_mutex_lock(&cip
->ci_lock
);
2097 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
, fmd_asru_update_status
,
2099 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2103 * Called on either the proxy side or the diag side when a repair has taken
2104 * place on the other side but this side may know the asru "contains"
2108 fmd_case_update_containees(fmd_case_t
*cp
)
2110 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2112 (void) pthread_mutex_lock(&cip
->ci_lock
);
2113 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
,
2114 fmd_asru_update_containees
, NULL
);
2115 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2119 * fmd_case_close_status() is called on diagnosing side when proxy side
2120 * has had a uuclose. It updates the status in the resource cache.
2123 fmd_case_close_status(fmd_case_t
*cp
)
2125 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2127 fmd_asru_close_status_t facs
;
2130 * update status of resource cache entries
2132 facs
.facs_countp
= &count
;
2133 facs
.facs_maxcount
= cip
->ci_nsuspects
;
2134 (void) pthread_mutex_lock(&cip
->ci_lock
);
2135 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
, fmd_asru_close_status
,
2137 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2141 * Indicate that the case may need to change state because one or more of the
2142 * ASRUs named as a suspect has changed state. We examine all the suspects
2143 * and if none are still faulty, we initiate a case close transition.
2146 fmd_case_update(fmd_case_t
*cp
)
2148 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2152 (void) pthread_mutex_lock(&cip
->ci_lock
);
2153 cstate
= cip
->ci_state
;
2155 if (cip
->ci_state
< FMD_CASE_SOLVED
) {
2156 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2157 return; /* update is not appropriate */
2160 if (cip
->ci_flags
& FMD_CF_REPAIRED
) {
2161 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2162 return; /* already repaired */
2165 TRACE((FMD_DBG_CASE
, "case update %s", cip
->ci_uuid
));
2166 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
, fmd_case_faulty
, &faulty
);
2167 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2174 TRACE((FMD_DBG_CASE
, "sending list.updated %s", cip
->ci_uuid
));
2175 nvl
= fmd_case_mkevent(cp
, FM_LIST_UPDATED_CLASS
);
2176 (void) nvlist_lookup_string(nvl
, FM_CLASS
, &class);
2177 e
= fmd_event_create(FMD_EVT_PROTOCOL
, FMD_HRT_NOW
, nvl
, class);
2178 (void) pthread_rwlock_rdlock(&fmd
.d_log_lock
);
2179 fmd_log_append(fmd
.d_fltlog
, e
, cp
);
2180 (void) pthread_rwlock_unlock(&fmd
.d_log_lock
);
2181 fmd_dispq_dispatch(fmd
.d_disp
, e
, class);
2182 return; /* one or more suspects are still marked faulty */
2185 if (cstate
== FMD_CASE_CLOSED
)
2186 fmd_case_transition(cp
, FMD_CASE_REPAIRED
, FMD_CF_REPAIRED
);
2188 fmd_case_transition(cp
, FMD_CASE_CLOSE_WAIT
, FMD_CF_REPAIRED
);
2192 * Delete a closed case from the module's case list once the fmdo_close() entry
2193 * point has run to completion. If the case is owned by a transport module,
2194 * tell the transport to proxy a case close on the other end of the transport.
2195 * Transition to the appropriate next state based on ci_flags. This
2196 * function represents the end of CLOSE_WAIT and transitions the case to either
2197 * CLOSED or REPAIRED or discards it entirely because it was never solved;
2198 * refer to the topmost block comment explaining the state machine for details.
2201 fmd_case_delete(fmd_case_t
*cp
)
2203 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2207 TRACE((FMD_DBG_CASE
, "case delete %s", cip
->ci_uuid
));
2208 ASSERT(fmd_module_locked(cip
->ci_mod
));
2209 fmd_list_delete(&cip
->ci_mod
->mod_cases
, cip
);
2210 buftotal
= fmd_buf_hash_destroy(&cip
->ci_bufs
);
2212 (void) pthread_mutex_lock(&cip
->ci_mod
->mod_stats_lock
);
2213 msp
= cip
->ci_mod
->mod_stats
;
2215 ASSERT(msp
->ms_caseopen
.fmds_value
.ui64
!= 0);
2216 msp
->ms_caseopen
.fmds_value
.ui64
--;
2218 ASSERT(msp
->ms_buftotal
.fmds_value
.ui64
>= buftotal
);
2219 msp
->ms_buftotal
.fmds_value
.ui64
-= buftotal
;
2221 (void) pthread_mutex_unlock(&cip
->ci_mod
->mod_stats_lock
);
2223 if (cip
->ci_xprt
== NULL
)
2224 fmd_module_setcdirty(cip
->ci_mod
);
2226 fmd_module_rele(cip
->ci_mod
);
2227 cip
->ci_mod
= fmd
.d_rmod
;
2228 fmd_module_hold(cip
->ci_mod
);
2231 * If the case has been solved, then retain it
2232 * on the root module's case list at least until we're transitioned.
2233 * Otherwise free the case with our final fmd_case_rele() below.
2235 if (cip
->ci_flags
& FMD_CF_SOLVED
) {
2236 fmd_module_lock(cip
->ci_mod
);
2237 fmd_list_append(&cip
->ci_mod
->mod_cases
, cip
);
2238 fmd_module_unlock(cip
->ci_mod
);
2243 * Transition onwards to REPAIRED or CLOSED as originally requested.
2244 * Note that for proxy case if we're transitioning to CLOSED it means
2245 * the case was isolated locally, so call fmd_xprt_uuclose() to notify
2246 * the diagnosing side. No need to notify the diagnosing side if we are
2247 * transitioning to REPAIRED as we only do this when requested to do
2248 * so by the diagnosing side anyway.
2250 if (cip
->ci_flags
& FMD_CF_REPAIRED
)
2251 fmd_case_transition(cp
, FMD_CASE_REPAIRED
, 0);
2252 else if (cip
->ci_flags
& FMD_CF_ISOLATED
) {
2253 fmd_case_transition(cp
, FMD_CASE_CLOSED
, 0);
2254 if (cip
->ci_xprt
!= NULL
)
2255 fmd_xprt_uuclose(cip
->ci_xprt
, cip
->ci_uuid
);
2262 fmd_case_discard(fmd_case_t
*cp
, boolean_t delete_from_asru_cache
)
2264 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2266 (void) pthread_mutex_lock(&cip
->ci_mod
->mod_stats_lock
);
2267 cip
->ci_mod
->mod_stats
->ms_caseopen
.fmds_value
.ui64
--;
2268 (void) pthread_mutex_unlock(&cip
->ci_mod
->mod_stats_lock
);
2270 ASSERT(fmd_module_locked(cip
->ci_mod
));
2271 fmd_list_delete(&cip
->ci_mod
->mod_cases
, cip
);
2272 if (delete_from_asru_cache
) {
2273 (void) pthread_mutex_lock(&cip
->ci_lock
);
2274 fmd_asru_hash_delete_case(fmd
.d_asrus
, cp
);
2275 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2281 * Indicate that the problem corresponding to a case has been repaired by
2282 * clearing the faulty bit on each ASRU named as a suspect. If the case hasn't
2283 * already been closed, this function initiates the transition to CLOSE_WAIT.
2284 * The caller must have the case held from fmd_case_hash_lookup(), so we can
2285 * grab and drop ci_lock without the case being able to be freed in between.
2288 fmd_case_repair(fmd_case_t
*cp
)
2290 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2292 fmd_asru_rep_arg_t fara
;
2294 (void) pthread_mutex_lock(&cip
->ci_lock
);
2295 cstate
= cip
->ci_state
;
2297 if (cstate
< FMD_CASE_SOLVED
) {
2298 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2299 return (fmd_set_errno(EFMD_CASE_STATE
));
2302 if (cip
->ci_flags
& FMD_CF_REPAIRED
) {
2303 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2304 return (0); /* already repaired */
2307 TRACE((FMD_DBG_CASE
, "case repair %s", cip
->ci_uuid
));
2308 fara
.fara_reason
= FMD_ASRU_REPAIRED
;
2309 fara
.fara_bywhat
= FARA_BY_CASE
;
2310 fara
.fara_rval
= NULL
;
2311 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
, fmd_asru_repaired
, &fara
);
2312 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2315 * if this is a proxied case, send the repair across the transport.
2316 * The remote side will then do the repair and send a list.repaired back
2317 * again such that we can finally repair the case on this side.
2319 if (cip
->ci_xprt
!= NULL
) {
2320 fmd_case_xprt_updated(cp
);
2324 if (cstate
== FMD_CASE_CLOSED
)
2325 fmd_case_transition(cp
, FMD_CASE_REPAIRED
, FMD_CF_REPAIRED
);
2327 fmd_case_transition(cp
, FMD_CASE_CLOSE_WAIT
, FMD_CF_REPAIRED
);
2333 fmd_case_acquit(fmd_case_t
*cp
)
2335 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2337 fmd_asru_rep_arg_t fara
;
2339 (void) pthread_mutex_lock(&cip
->ci_lock
);
2340 cstate
= cip
->ci_state
;
2342 if (cstate
< FMD_CASE_SOLVED
) {
2343 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2344 return (fmd_set_errno(EFMD_CASE_STATE
));
2347 if (cip
->ci_flags
& FMD_CF_REPAIRED
) {
2348 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2349 return (0); /* already repaired */
2352 TRACE((FMD_DBG_CASE
, "case acquit %s", cip
->ci_uuid
));
2353 fara
.fara_reason
= FMD_ASRU_ACQUITTED
;
2354 fara
.fara_bywhat
= FARA_BY_CASE
;
2355 fara
.fara_rval
= NULL
;
2356 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
, fmd_asru_repaired
, &fara
);
2357 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2360 * if this is a proxied case, send the repair across the transport.
2361 * The remote side will then do the repair and send a list.repaired back
2362 * again such that we can finally repair the case on this side.
2364 if (cip
->ci_xprt
!= NULL
) {
2365 fmd_case_xprt_updated(cp
);
2369 if (cstate
== FMD_CASE_CLOSED
)
2370 fmd_case_transition(cp
, FMD_CASE_REPAIRED
, FMD_CF_REPAIRED
);
2372 fmd_case_transition(cp
, FMD_CASE_CLOSE_WAIT
, FMD_CF_REPAIRED
);
2378 fmd_case_contains(fmd_case_t
*cp
, fmd_event_t
*ep
)
2380 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2381 fmd_case_item_t
*cit
;
2385 (void) pthread_mutex_lock(&cip
->ci_lock
);
2387 if (cip
->ci_state
>= FMD_CASE_SOLVED
)
2388 state
= FMD_EVS_DIAGNOSED
;
2390 state
= FMD_EVS_ACCEPTED
;
2392 for (cit
= cip
->ci_items
; cit
!= NULL
; cit
= cit
->cit_next
) {
2393 if ((rv
= fmd_event_equal(ep
, cit
->cit_event
)) != 0)
2397 if (rv
== 0 && cip
->ci_principal
!= NULL
)
2398 rv
= fmd_event_equal(ep
, cip
->ci_principal
);
2400 (void) pthread_mutex_unlock(&cip
->ci_lock
);
2403 fmd_event_transition(ep
, state
);
2409 fmd_case_orphaned(fmd_case_t
*cp
)
2411 return (((fmd_case_impl_t
*)cp
)->ci_mod
== fmd
.d_rmod
);
2415 fmd_case_settime(fmd_case_t
*cp
, time_t tv_sec
, suseconds_t tv_usec
)
2417 ((fmd_case_impl_t
*)cp
)->ci_tv
.tv_sec
= tv_sec
;
2418 ((fmd_case_impl_t
*)cp
)->ci_tv
.tv_usec
= tv_usec
;
2419 ((fmd_case_impl_t
*)cp
)->ci_tv_valid
= 1;
2423 fmd_case_set_injected(fmd_case_t
*cp
)
2425 ((fmd_case_impl_t
*)cp
)->ci_injected
= 1;
2429 fmd_case_set_de_fmri(fmd_case_t
*cp
, nvlist_t
*nvl
)
2431 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2433 nvlist_free(cip
->ci_diag_de
);
2434 cip
->ci_diag_de
= nvl
;
2438 fmd_case_setcode(fmd_case_t
*cp
, char *code
)
2440 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2442 cip
->ci_code
= fmd_strdup(code
, FMD_SLEEP
);
2443 cip
->ci_codelen
= cip
->ci_code
? strlen(cip
->ci_code
) + 1 : 0;
2448 fmd_case_repair_replay_case(fmd_case_t
*cp
, void *arg
)
2455 int any_unusable_and_present
= 0;
2456 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
2458 if (cip
->ci_state
< FMD_CASE_SOLVED
|| cip
->ci_xprt
!= NULL
)
2461 if (cip
->ci_state
== FMD_CASE_RESOLVED
) {
2462 cip
->ci_flags
|= FMD_CF_RES_CMPL
;
2466 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
, fmd_case_faulty
, &faulty
);
2467 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
, fmd_case_not_faulty
,
2470 if (cip
->ci_state
>= FMD_CASE_REPAIRED
&& !faulty
) {
2472 * If none of the suspects is faulty, replay the list.repaired.
2473 * If all suspects are already either usable or not present then
2474 * also transition straight to RESOLVED state.
2476 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
,
2477 fmd_case_unusable_and_present
, &any_unusable_and_present
);
2478 if (!any_unusable_and_present
) {
2479 cip
->ci_state
= FMD_CASE_RESOLVED
;
2481 TRACE((FMD_DBG_CASE
, "replay sending list.repaired %s",
2483 nvl
= fmd_case_mkevent(cp
, FM_LIST_REPAIRED_CLASS
);
2484 (void) nvlist_lookup_string(nvl
, FM_CLASS
, &class);
2485 e
= fmd_event_create(FMD_EVT_PROTOCOL
, FMD_HRT_NOW
, nvl
,
2487 fmd_dispq_dispatch(fmd
.d_disp
, e
, class);
2489 TRACE((FMD_DBG_CASE
, "replay sending list.resolved %s",
2491 fmd_case_publish(cp
, FMD_CASE_RESOLVED
);
2492 fmd_asru_hash_apply_by_case(fmd
.d_asrus
, cp
,
2493 fmd_asru_log_resolved
, NULL
);
2494 cip
->ci_flags
|= FMD_CF_RES_CMPL
;
2496 TRACE((FMD_DBG_CASE
, "replay sending list.repaired %s",
2498 nvl
= fmd_case_mkevent(cp
, FM_LIST_REPAIRED_CLASS
);
2499 (void) nvlist_lookup_string(nvl
, FM_CLASS
, &class);
2500 e
= fmd_event_create(FMD_EVT_PROTOCOL
, FMD_HRT_NOW
, nvl
,
2502 fmd_dispq_dispatch(fmd
.d_disp
, e
, class);
2504 } else if (faulty
&& not_faulty
) {
2506 * if some but not all of the suspects are not faulty, replay
2509 TRACE((FMD_DBG_CASE
, "replay sending list.updated %s",
2511 nvl
= fmd_case_mkevent(cp
, FM_LIST_UPDATED_CLASS
);
2512 (void) nvlist_lookup_string(nvl
, FM_CLASS
, &class);
2513 e
= fmd_event_create(FMD_EVT_PROTOCOL
, FMD_HRT_NOW
, nvl
, class);
2514 fmd_dispq_dispatch(fmd
.d_disp
, e
, class);
2519 fmd_case_repair_replay()
2521 fmd_case_hash_apply(fmd
.d_cases
, fmd_case_repair_replay_case
, NULL
);