4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/types.h>
35 #include <fm/fmd_log.h>
36 #include <sys/fm/protocol.h>
37 #include <fm/libtopo.h>
38 #include <fm/fmd_adm.h>
39 #include <fm/fmd_msg.h>
41 #include <sys/systeminfo.h>
42 #include <sys/utsname.h>
45 #include <sys/smbios.h>
46 #include <libdevinfo.h>
51 * Fault records are added to catalog by calling add_fault_record_to_catalog()
52 * records are stored in order of importance to the system.
53 * If -g flag is set or not_suppressed is not set and the class fru, fault,
54 * type are the same then details are merged into an existing record, with uuid
55 * records are stored in time order.
56 * For each record information is extracted from nvlist and merged into linked
57 * list each is checked for identical records for which percentage certainty are
59 * print_catalog() is called to print out catalog and release external resources
62 * status_rec_list -> | | -|
65 * /---------------\ /-------\ /-------\
66 * status_fru_list | status_record | -> | uurec | -> | uurec | -|
67 * \/ | | |- | | <- | |
68 * /-------------\ | | \-------/ \-------/
70 * \-------------/ | | /-------\ /-------\
71 * \/ | | -> | asru | -> | asru |
73 * | | \-------/ \-------/
74 * status_asru_list | class |
75 * \/ | resource | /-------\ /-------\
76 * /-------------\ | fru | -> | list | -> | list |
77 * | | -> | serial | | | <- | |
78 * \-------------/ | | \-------/ \-------/
79 * \/ \---------------/
85 * Fmadm faulty takes a number of options which affect the format of the
86 * output displayed. By default, the display reports the FRU and ASRU along
87 * with other information on per-case basis as in the example below.
89 * --------------- ------------------------------------ -------------- -------
90 * TIME EVENT-ID MSG-ID SEVERITY
91 * --------------- ------------------------------------ -------------- -------
92 * Sep 21 10:01:36 d482f935-5c8f-e9ab-9f25-d0aaafec1e6c AMD-8000-2F Major
94 * Fault class : fault.memory.dimm_sb
95 * Affects : mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0
96 * faulted but still in service
97 * FRU : "CPU 0 DIMM 0" (hc://.../memory-controller=0/dimm=0)
100 * Description : The number of errors associated with this memory module has
101 * exceeded acceptable levels. Refer to
102 * http://illumos.org/msg/AMD-8000-2F for more information.
104 * Response : Pages of memory associated with this memory module are being
105 * removed from service as errors are reported.
107 * Impact : Total system memory capacity will be reduced as pages are
110 * Action : Schedule a repair procedure to replace the affected memory
111 * module. Use fmdump -v -u <EVENT_ID> to identify the module.
113 * The -v flag is similar, but adds some additonal information such as the
114 * resource. The -s flag is also similar but just gives the top line summary.
115 * All these options (ie without the -f or -r flags) use the print_catalog()
116 * function to do the display.
118 * The -f flag changes the output so that it appears sorted on a per-fru basis.
119 * The output is somewhat cut down compared to the default output. If -f is
120 * used, then print_fru() is used to print the output.
122 * -----------------------------------------------------------------------------
123 * "SLOT 2" (hc://.../hostbridge=3/pciexrc=3/pciexbus=4/pciexdev=0) faulty
124 * 5ca4aeb3-36...f6be-c2e8166dc484 2 suspects in this FRU total certainty 100%
126 * Description : A problem was detected for a PCI device.
127 * Refer to http://illumos.org/msg/PCI-8000-7J
128 * for more information.
130 * Response : One or more device instances may be disabled
132 * Impact : Possible loss of services provided by the device instances
133 * associated with this fault
135 * Action : Schedule a repair procedure to replace the affected device.
136 * Use fmdump -v -u <EVENT_ID> to identify the device or contact
139 * The -r flag changes the output so that it appears sorted on a per-asru basis.
140 * The output is very much cut down compared to the default output, just giving
141 * the asru fmri and state. Here print_asru() is used to print the output.
143 * mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0 degraded
145 * For all fmadm faulty options, the sequence of events is
147 * 1) Walk through all the cases in the system using fmd_adm_case_iter() and
148 * for each case call dfault_rec(). This will call add_fault_record_to_catalog()
149 * This will extract the data from the nvlist and call catalog_new_record() to
150 * save the data away in various linked lists in the catalogue.
152 * 2) Once this is done, the data can be supplemented by using
153 * fmd_adm_rsrc_iter(). However this is now only necessary for the -i option.
155 * 3) Finally print_catalog(), print_fru() or print_asru() are called as
156 * appropriate to display the information from the catalogue sorted in the
161 typedef struct name_list
{
162 struct name_list
*next
;
163 struct name_list
*prev
;
172 typedef struct ari_list
{
174 struct ari_list
*next
;
177 typedef struct uurec
{
181 ari_list_t
*ari_uuid_list
;
187 typedef struct uurec_select
{
188 struct uurec_select
*next
;
192 typedef struct host_id
{
200 typedef struct host_id_list
{
202 struct host_id_list
*next
;
205 typedef struct status_record
{
209 char *severity
; /* in C locale */
212 name_list_t
*resource
;
216 uint8_t not_suppressed
;
220 typedef struct sr_list
{
221 struct sr_list
*next
;
222 struct sr_list
*prev
;
223 struct status_record
*status_record
;
226 typedef struct resource_list
{
227 struct resource_list
*next
;
228 struct resource_list
*prev
;
229 sr_list_t
*status_rec_list
;
231 uint8_t not_suppressed
;
236 sr_list_t
*status_rec_list
;
237 resource_list_t
*status_fru_list
;
238 resource_list_t
*status_asru_list
;
240 static int max_display
;
241 static int max_fault
= 0;
242 static topo_hdl_t
*topo_handle
;
243 static host_id_list_t
*host_list
;
246 static fmd_msg_hdl_t
*fmadm_msghdl
= NULL
; /* handle for libfmd_msg calls */
249 format_date(char *buf
, size_t len
, uint64_t sec
)
251 if (sec
> LONG_MAX
) {
252 (void) fprintf(stderr
,
253 "record time is too large for 32-bit utility\n");
254 (void) snprintf(buf
, len
, "0x%llx", sec
);
256 time_t tod
= (time_t)sec
;
257 time_t now
= time(NULL
);
259 tod
< now
- 6L*30L*24L*60L*60L) { /* 6 months ago */
260 (void) strftime(buf
, len
, "%b %d %Y ",
263 (void) strftime(buf
, len
, "%b %d %T", localtime(&tod
));
271 find_hostid_in_list(char *platform
, char *chassis
, char *server
, char *domain
,
275 host_id_list_t
*hostp
;
277 if (platform
== NULL
)
283 if (hostp
->hostid
.platform
&&
284 strcmp(hostp
->hostid
.platform
, platform
) == 0 &&
285 hostp
->hostid
.server
&&
286 strcmp(hostp
->hostid
.server
, server
) == 0 &&
287 (chassis
== NULL
|| hostp
->hostid
.chassis
== NULL
||
288 strcmp(chassis
, hostp
->hostid
.chassis
) == 0) &&
289 (product_sn
== NULL
|| hostp
->hostid
.product_sn
== NULL
||
290 strcmp(product_sn
, hostp
->hostid
.product_sn
) == 0) &&
291 (domain
== NULL
|| hostp
->hostid
.domain
== NULL
||
292 strcmp(domain
, hostp
->hostid
.domain
) == 0)) {
299 hostp
= malloc(sizeof (host_id_list_t
));
300 hostp
->hostid
.platform
= strdup(platform
);
301 hostp
->hostid
.product_sn
=
302 product_sn
? strdup(product_sn
) : NULL
;
303 hostp
->hostid
.server
= strdup(server
);
304 hostp
->hostid
.chassis
= chassis
? strdup(chassis
) : NULL
;
305 hostp
->hostid
.domain
= domain
? strdup(domain
) : NULL
;
306 hostp
->next
= host_list
;
315 find_hostid(nvlist_t
*nvl
)
317 char *platform
= NULL
, *chassis
= NULL
, *server
= NULL
, *domain
= NULL
;
318 char *product_sn
= NULL
;
319 nvlist_t
*auth
, *fmri
;
322 if (nvlist_lookup_nvlist(nvl
, FM_SUSPECT_DE
, &fmri
) == 0 &&
323 nvlist_lookup_nvlist(fmri
, FM_FMRI_AUTHORITY
, &auth
) == 0) {
324 (void) nvlist_lookup_string(auth
, FM_FMRI_AUTH_PRODUCT
,
326 (void) nvlist_lookup_string(auth
, FM_FMRI_AUTH_PRODUCT_SN
,
328 (void) nvlist_lookup_string(auth
, FM_FMRI_AUTH_SERVER
, &server
);
329 (void) nvlist_lookup_string(auth
, FM_FMRI_AUTH_CHASSIS
,
331 (void) nvlist_lookup_string(auth
, FM_FMRI_AUTH_DOMAIN
, &domain
);
332 rt
= find_hostid_in_list(platform
, chassis
, server
,
339 get_nvl2str_topo(nvlist_t
*nvl
)
345 char *mod_name
= NULL
;
348 if (topo_handle
== NULL
)
349 topo_handle
= topo_open(TOPO_VERSION
, 0, &err
);
350 if (topo_fmri_nvl2str(topo_handle
, nvl
, &tname
, &err
) == 0) {
351 name
= strdup(tname
);
352 topo_hdl_strfree(topo_handle
, tname
);
354 (void) nvlist_lookup_string(nvl
, FM_FMRI_SCHEME
, &scheme
);
355 (void) nvlist_lookup_string(nvl
, FM_FMRI_MOD_NAME
, &mod_name
);
356 if (scheme
&& strcmp(scheme
, FM_FMRI_SCHEME_FMD
) == 0 &&
358 (void) snprintf(buf
, sizeof (buf
), "%s:///module/%s",
367 set_priority(char *s
)
372 if (strcmp(s
, "Minor") == 0)
374 else if (strcmp(s
, "Major") == 0)
376 else if (strcmp(s
, "Critical") == 0)
383 cmp_priority(char *s1
, char *s2
, uint64_t t1
, uint64_t t2
, uint8_t p1
,
389 r1
= set_priority(s1
);
390 r2
= set_priority(s2
);
404 * merge two lists into one, by comparing enties in new and moving into list if
405 * name is not there or free off memory for names which are already there
406 * add_pct indicates if pct is the sum or highest pct
409 merge_name_list(name_list_t
**list
, name_list_t
*new, int add_pct
)
411 name_list_t
*lp
, *np
, *sp
, *rt
= NULL
;
419 if (strcmp(lp
->name
, np
->name
) == 0)
430 lp
->status
|= (np
->status
& FM_SUSPECT_FAULTY
);
433 lp
->count
+= np
->count
;
434 } else if (np
->pct
> lp
->pct
) {
437 max_pct
= np
->max_pct
;
442 if (max_pct
> lp
->max_pct
) {
443 lp
->max_pct
= max_pct
;
444 if (lp
->max_pct
> lp
->prev
->max_pct
&&
446 lp
->prev
->next
= lp
->next
;
447 lp
->next
->prev
= lp
->prev
;
455 if (np
->max_pct
> lp
->max_pct
) {
464 while (lp
!= *list
&&
465 np
->max_pct
< lp
->max_pct
) {
486 alloc_name_list(char *name
, uint8_t pct
)
490 nlp
= malloc(sizeof (*nlp
));
491 nlp
->name
= strdup(name
);
502 static status_record_t
*
503 new_record_init(uurec_t
*uurec_p
, char *msgid
, name_list_t
*class,
504 name_list_t
*fru
, name_list_t
*asru
, name_list_t
*resource
,
505 name_list_t
*serial
, boolean_t not_suppressed
,
506 hostid_t
*hostid
, boolean_t injected
)
508 status_record_t
*status_rec_p
;
510 status_rec_p
= (status_record_t
*)malloc(sizeof (status_record_t
));
511 status_rec_p
->nrecs
= 1;
512 status_rec_p
->host
= hostid
;
513 status_rec_p
->uurec
= uurec_p
;
514 uurec_p
->next
= NULL
;
515 uurec_p
->prev
= NULL
;
516 uurec_p
->asru
= asru
;
517 if ((status_rec_p
->severity
= fmd_msg_getitem_id(fmadm_msghdl
, NULL
,
518 msgid
, FMD_MSG_ITEM_SEVERITY
)) == NULL
)
519 status_rec_p
->severity
= strdup("unknown");
520 status_rec_p
->class = class;
521 status_rec_p
->fru
= fru
;
522 status_rec_p
->asru
= asru
;
523 status_rec_p
->resource
= resource
;
524 status_rec_p
->serial
= serial
;
525 status_rec_p
->msgid
= strdup(msgid
);
526 status_rec_p
->not_suppressed
= not_suppressed
;
527 status_rec_p
->injected
= injected
;
528 return (status_rec_p
);
532 * add record to given list maintaining order higher priority first.
535 add_rec_list(status_record_t
*status_rec_p
, sr_list_t
**list_pp
)
537 sr_list_t
*tp
, *np
, *sp
;
541 np
= malloc(sizeof (sr_list_t
));
542 np
->status_record
= status_rec_p
;
543 sec
= status_rec_p
->uurec
->sec
;
544 if ((sp
= *list_pp
) == NULL
) {
549 /* insert new record in front of lower priority */
551 order
= cmp_priority(status_rec_p
->severity
,
552 sp
->status_record
->severity
, sec
,
553 tp
->status_record
->uurec
->sec
, 0, 0);
559 cmp_priority(status_rec_p
->severity
,
560 tp
->status_record
->severity
, sec
,
561 tp
->status_record
->uurec
->sec
, 0, 0)) {
573 add_resource(status_record_t
*status_rec_p
, resource_list_t
**rp
,
578 resource_list_t
*sp
, *tp
;
579 status_record_t
*srp
;
580 char *severity
= status_rec_p
->severity
;
582 add_rec_list(status_rec_p
, &np
->status_rec_list
);
583 if ((sp
= *rp
) == NULL
) {
589 * insert new record in front of lower priority
592 srp
= sp
->status_rec_list
->status_record
;
593 sec
= status_rec_p
->uurec
->sec
;
594 order
= cmp_priority(severity
, srp
->severity
, sec
,
595 srp
->uurec
->sec
, np
->max_pct
, sp
->max_pct
);
599 srp
= tp
->status_rec_list
->status_record
;
601 cmp_priority(severity
, srp
->severity
, sec
,
602 srp
->uurec
->sec
, np
->max_pct
, sp
->max_pct
) < 0) {
604 srp
= tp
->status_rec_list
->status_record
;
615 add_resource_list(status_record_t
*status_rec_p
, name_list_t
*fp
,
616 resource_list_t
**rpp
)
619 resource_list_t
*np
, *end
;
620 status_record_t
*srp
;
625 if (strcmp(fp
->name
, np
->resource
) == 0) {
626 np
->not_suppressed
|= status_rec_p
->not_suppressed
;
627 np
->injected
|= status_rec_p
->injected
;
628 srp
= np
->status_rec_list
->status_record
;
629 order
= cmp_priority(status_rec_p
->severity
,
630 srp
->severity
, status_rec_p
->uurec
->sec
,
631 srp
->uurec
->sec
, fp
->max_pct
, np
->max_pct
);
632 if (order
> 0 && np
!= end
) {
634 * remove from list and add again using
637 np
->prev
->next
= np
->next
;
638 np
->next
->prev
= np
->prev
;
639 add_resource(status_rec_p
,
642 add_rec_list(status_rec_p
,
643 &np
->status_rec_list
);
654 np
= malloc(sizeof (resource_list_t
));
655 np
->resource
= fp
->name
;
656 np
->not_suppressed
= status_rec_p
->not_suppressed
;
657 np
->injected
= status_rec_p
->injected
;
658 np
->status_rec_list
= NULL
;
659 np
->max_pct
= fp
->max_pct
;
660 add_resource(status_rec_p
, rpp
, np
);
665 add_list(status_record_t
*status_rec_p
, name_list_t
*listp
,
666 resource_list_t
**glistp
)
668 name_list_t
*fp
, *end
;
673 add_resource_list(status_rec_p
, fp
, glistp
);
681 * add record to rec, fru and asru lists.
684 catalog_new_record(uurec_t
*uurec_p
, char *msgid
, name_list_t
*class,
685 name_list_t
*fru
, name_list_t
*asru
, name_list_t
*resource
,
686 name_list_t
*serial
, boolean_t not_suppressed
,
687 hostid_t
*hostid
, boolean_t injected
, boolean_t dummy_fru
)
689 status_record_t
*status_rec_p
;
691 status_rec_p
= new_record_init(uurec_p
, msgid
, class, fru
, asru
,
692 resource
, serial
, not_suppressed
, hostid
, injected
);
693 add_rec_list(status_rec_p
, &status_rec_list
);
694 if (status_rec_p
->fru
&& !dummy_fru
)
695 add_list(status_rec_p
, status_rec_p
->fru
, &status_fru_list
);
696 if (status_rec_p
->asru
)
697 add_list(status_rec_p
, status_rec_p
->asru
, &status_asru_list
);
701 get_serial_no(nvlist_t
*nvl
, name_list_t
**serial_p
, uint8_t pct
)
705 char **lserial
= NULL
;
712 if (nvlist_lookup_string(nvl
, FM_FMRI_SCHEME
, &name
) == 0) {
713 if (strcmp(name
, FM_FMRI_SCHEME_CPU
) == 0) {
714 if (nvlist_lookup_uint64(nvl
, FM_FMRI_CPU_SERIAL_ID
,
716 (void) snprintf(buf
, sizeof (buf
), "%llX",
718 nlp
= alloc_name_list(buf
, pct
);
719 (void) merge_name_list(serial_p
, nlp
, 1);
721 } else if (strcmp(name
, FM_FMRI_SCHEME_MEM
) == 0) {
722 if (nvlist_lookup_string_array(nvl
,
723 FM_FMRI_MEM_SERIAL_ID
, &lserial
, &nelem
) == 0) {
724 nlp
= alloc_name_list(lserial
[0], pct
);
725 for (j
= 1; j
< nelem
; j
++) {
727 n1lp
= alloc_name_list(lserial
[j
], pct
);
728 (void) merge_name_list(&nlp
, n1lp
, 1);
730 (void) merge_name_list(serial_p
, nlp
, 1);
732 } else if (strcmp(name
, FM_FMRI_SCHEME_HC
) == 0) {
733 if (nvlist_lookup_string(nvl
, FM_FMRI_HC_SERIAL_ID
,
735 nlp
= alloc_name_list(serial
, pct
);
736 (void) merge_name_list(serial_p
, nlp
, 1);
743 extract_record_info(nvlist_t
*nvl
, name_list_t
**class_p
,
744 name_list_t
**fru_p
, name_list_t
**serial_p
, name_list_t
**resource_p
,
745 name_list_t
**asru_p
, boolean_t
*dummy_fru
, uint8_t status
)
747 nvlist_t
*lfru
, *lasru
, *rsrc
;
754 (void) nvlist_lookup_uint8(nvl
, FM_FAULT_CERTAINTY
, &lpct
);
755 if (nvlist_lookup_string(nvl
, FM_CLASS
, &lclass
) == 0) {
756 nlp
= alloc_name_list(lclass
, lpct
);
757 (void) merge_name_list(class_p
, nlp
, 1);
759 if (nvlist_lookup_nvlist(nvl
, FM_FAULT_FRU
, &lfru
) == 0) {
760 name
= get_nvl2str_topo(lfru
);
762 nlp
= alloc_name_list(name
, lpct
);
763 nlp
->status
= status
& ~(FM_SUSPECT_UNUSABLE
|
764 FM_SUSPECT_DEGRADED
);
766 if (nvlist_lookup_string(nvl
, FM_FAULT_LOCATION
,
768 nlp
->label
= strdup(label
);
769 (void) merge_name_list(fru_p
, nlp
, 1);
771 get_serial_no(lfru
, serial_p
, lpct
);
772 } else if (nvlist_lookup_nvlist(nvl
, FM_FAULT_RESOURCE
, &rsrc
) != 0) {
774 * No FRU or resource. But we want to display the repair status
775 * somehow, so create a dummy FRU field.
778 nlp
= alloc_name_list(dgettext("FMD", "None"), lpct
);
779 nlp
->status
= status
& ~(FM_SUSPECT_UNUSABLE
|
780 FM_SUSPECT_DEGRADED
);
781 (void) merge_name_list(fru_p
, nlp
, 1);
783 if (nvlist_lookup_nvlist(nvl
, FM_FAULT_ASRU
, &lasru
) == 0) {
784 name
= get_nvl2str_topo(lasru
);
786 nlp
= alloc_name_list(name
, lpct
);
787 nlp
->status
= status
& ~(FM_SUSPECT_NOT_PRESENT
|
788 FM_SUSPECT_REPAIRED
| FM_SUSPECT_REPLACED
|
789 FM_SUSPECT_ACQUITTED
);
791 (void) merge_name_list(asru_p
, nlp
, 1);
793 get_serial_no(lasru
, serial_p
, lpct
);
795 if (nvlist_lookup_nvlist(nvl
, FM_FAULT_RESOURCE
, &rsrc
) == 0) {
796 name
= get_nvl2str_topo(rsrc
);
798 nlp
= alloc_name_list(name
, lpct
);
799 nlp
->status
= status
;
801 if (nvlist_lookup_string(nvl
, FM_FAULT_LOCATION
,
803 nlp
->label
= strdup(label
);
804 (void) merge_name_list(resource_p
, nlp
, 1);
810 add_fault_record_to_catalog(nvlist_t
*nvl
, uint64_t sec
, char *uuid
)
814 name_list_t
*class = NULL
, *resource
= NULL
;
815 name_list_t
*asru
= NULL
, *fru
= NULL
, *serial
= NULL
;
820 boolean_t not_suppressed
= 1;
821 boolean_t any_present
= 0;
822 boolean_t injected
= 0;
823 boolean_t dummy_fru
= 0;
825 (void) nvlist_lookup_string(nvl
, FM_SUSPECT_DIAG_CODE
, &msgid
);
826 (void) nvlist_lookup_uint32(nvl
, FM_SUSPECT_FAULT_SZ
, &size
);
827 (void) nvlist_lookup_boolean_value(nvl
, FM_SUSPECT_MESSAGE
,
829 (void) nvlist_lookup_boolean_value(nvl
, FM_SUSPECT_INJECTED
, &injected
);
832 (void) nvlist_lookup_nvlist_array(nvl
, FM_SUSPECT_FAULT_LIST
,
834 (void) nvlist_lookup_uint8_array(nvl
, FM_SUSPECT_FAULT_STATUS
,
836 for (i
= 0; i
< size
; i
++) {
837 extract_record_info(nva
[i
], &class, &fru
, &serial
,
838 &resource
, &asru
, &dummy_fru
, ba
[i
]);
839 if (!(ba
[i
] & FM_SUSPECT_NOT_PRESENT
) &&
840 (ba
[i
] & FM_SUSPECT_FAULTY
))
844 * also suppress if no resources present
846 if (any_present
== 0)
850 uurec_p
= (uurec_t
*)malloc(sizeof (uurec_t
));
851 uurec_p
->uuid
= strdup(uuid
);
853 uurec_p
->ari_uuid_list
= NULL
;
854 uurec_p
->event
= NULL
;
855 (void) nvlist_dup(nvl
, &uurec_p
->event
, 0);
856 host
= find_hostid(nvl
);
857 catalog_new_record(uurec_p
, msgid
, class, fru
, asru
,
858 resource
, serial
, not_suppressed
, host
, injected
, dummy_fru
);
862 update_asru_state_in_catalog(const char *uuid
, const char *ari_uuid
)
866 ari_list_t
*ari_list
;
868 srp
= status_rec_list
;
871 uurp
= srp
->status_record
->uurec
;
873 if (strcmp(uuid
, uurp
->uuid
) == 0) {
874 ari_list
= (ari_list_t
*)
875 malloc(sizeof (ari_list_t
));
876 ari_list
->ari_uuid
= strdup(ari_uuid
);
877 ari_list
->next
= uurp
->ari_uuid_list
;
878 uurp
->ari_uuid_list
= ari_list
;
883 if (srp
->next
== status_rec_list
)
891 print_line(char *label
, char *buf
)
900 padding
= malloc(lsz
+ 1);
901 for (i
= 0; i
< lsz
; i
++)
911 while ((c
= *ep
) != '\0' && (wp
== NULL
|| i
< 80)) {
914 else if (c
== '\n') {
919 } while ((c
= *ep
) != '\0' && c
== ' ');
930 (void) printf("%s%s\n", label
, cp
);
938 print_dict_info_line(nvlist_t
*e
, fmd_msg_item_t what
, const char *linehdr
)
940 char *cp
= fmd_msg_getitem_nv(fmadm_msghdl
, NULL
, e
, what
);
943 print_line(dgettext("FMD", linehdr
), cp
);
949 print_dict_info(nvlist_t
*nvl
)
951 print_dict_info_line(nvl
, FMD_MSG_ITEM_DESC
, "Description : ");
952 print_dict_info_line(nvl
, FMD_MSG_ITEM_RESPONSE
, "Response : ");
953 print_dict_info_line(nvl
, FMD_MSG_ITEM_IMPACT
, "Impact : ");
954 print_dict_info_line(nvl
, FMD_MSG_ITEM_ACTION
, "Action : ");
958 print_name(name_list_t
*list
, char *padding
, int *np
, int pct
, int full
)
964 (void) printf("%s \"%s\" (%s)", padding
, list
->label
, name
);
967 (void) printf("%s %s", padding
, name
);
970 if (list
->pct
&& pct
> 0 && pct
< 100) {
971 if (list
->count
> 1) {
973 (void) printf(" %d @ %s %d%%\n", list
->count
,
974 dgettext("FMD", "max"),
977 (void) printf(" %s %d%%\n",
978 dgettext("FMD", "max"),
982 (void) printf(" %d%%\n", list
->pct
);
990 print_asru_status(int status
, char *label
)
996 msg
= dgettext("FMD", "ok and in service");
998 case FM_SUSPECT_DEGRADED
:
999 msg
= dgettext("FMD", "service degraded, "
1000 "but associated components no longer faulty");
1002 case FM_SUSPECT_FAULTY
| FM_SUSPECT_DEGRADED
:
1003 msg
= dgettext("FMD", "faulted but still "
1004 "providing degraded service");
1006 case FM_SUSPECT_FAULTY
:
1007 msg
= dgettext("FMD", "faulted but still in service");
1009 case FM_SUSPECT_UNUSABLE
:
1010 msg
= dgettext("FMD", "out of service, "
1011 "but associated components no longer faulty");
1013 case FM_SUSPECT_FAULTY
| FM_SUSPECT_UNUSABLE
:
1014 msg
= dgettext("FMD", "faulted and taken out of service");
1020 (void) printf("%s %s\n", label
, msg
);
1025 print_fru_status(int status
, char *label
)
1029 if (status
& FM_SUSPECT_NOT_PRESENT
)
1030 msg
= dgettext("FMD", "not present");
1031 else if (status
& FM_SUSPECT_FAULTY
)
1032 msg
= dgettext("FMD", "faulty");
1033 else if (status
& FM_SUSPECT_REPLACED
)
1034 msg
= dgettext("FMD", "replaced");
1035 else if (status
& FM_SUSPECT_REPAIRED
)
1036 msg
= dgettext("FMD", "repair attempted");
1037 else if (status
& FM_SUSPECT_ACQUITTED
)
1038 msg
= dgettext("FMD", "acquitted");
1040 msg
= dgettext("FMD", "removed");
1041 (void) printf("%s %s\n", label
, msg
);
1045 print_rsrc_status(int status
, char *label
)
1049 if (status
& FM_SUSPECT_NOT_PRESENT
)
1050 msg
= dgettext("FMD", "not present");
1051 else if (status
& FM_SUSPECT_FAULTY
) {
1052 if (status
& FM_SUSPECT_DEGRADED
)
1053 msg
= dgettext("FMD",
1054 "faulted but still providing degraded service");
1055 else if (status
& FM_SUSPECT_UNUSABLE
)
1056 msg
= dgettext("FMD",
1057 "faulted and taken out of service");
1059 msg
= dgettext("FMD", "faulted but still in service");
1060 } else if (status
& FM_SUSPECT_REPLACED
)
1061 msg
= dgettext("FMD", "replaced");
1062 else if (status
& FM_SUSPECT_REPAIRED
)
1063 msg
= dgettext("FMD", "repair attempted");
1064 else if (status
& FM_SUSPECT_ACQUITTED
)
1065 msg
= dgettext("FMD", "acquitted");
1067 msg
= dgettext("FMD", "removed");
1068 (void) printf("%s %s\n", label
, msg
);
1072 print_name_list(name_list_t
*list
, char *label
,
1073 int limit
, int pct
, void (func1
)(int, char *), int full
)
1078 name_list_t
*end
= list
;
1081 padding
= malloc(l
+ 1);
1082 for (i
= 0; i
< l
; i
++)
1085 (void) printf("%s", label
);
1088 (void) printf(" \"%s\" (%s)", list
->label
, name
);
1090 (void) printf(" %s", name
);
1091 if (list
->pct
&& pct
> 0 && pct
< 100) {
1092 if (list
->count
> 1) {
1094 (void) printf(" %d @ %s %d%%\n", list
->count
,
1095 dgettext("FMD", "max"), list
->max_pct
);
1097 (void) printf(" %s %d%%\n",
1098 dgettext("FMD", "max"), list
->max_pct
);
1101 (void) printf(" %d%%\n", list
->pct
);
1104 (void) printf("\n");
1107 func1(list
->status
, padding
);
1110 while ((list
= list
->next
) != end
) {
1111 if (limit
== 0 || n
< limit
) {
1112 print_name(list
, padding
, &n
, pct
, full
);
1114 func1(list
->status
, padding
);
1119 print_name(list
->prev
, padding
, &n
, pct
, full
);
1121 (void) printf("%s... %d %s\n", padding
, j
,
1122 dgettext("FMD", "more entries suppressed,"
1123 " use -v option for full list"));
1129 asru_same_status(name_list_t
*list
)
1131 name_list_t
*end
= list
;
1132 int status
= list
->status
;
1134 while ((list
= list
->next
) != end
) {
1136 status
= list
->status
;
1139 if (list
->status
!= -1 && status
!= list
->status
) {
1148 serial_in_fru(name_list_t
*fru
, name_list_t
*serial
)
1150 name_list_t
*sp
= serial
;
1159 (void) snprintf(buf
, sizeof (buf
), "serial=%s", sp
->name
);
1160 buf
[sizeof (buf
) - 1] = 0;
1162 if (strstr(fp
->name
, buf
) != NULL
) {
1174 return (found
== nserial
? 1 : 0);
1178 print_sup_record(status_record_t
*srp
, int opt_i
, int full
)
1181 uurec_t
*uurp
= srp
->uurec
;
1184 ari_list_t
*ari_list
;
1193 k
= srp
->nrecs
- max
;
1194 while ((uurp
= uurp
->next
) != NULL
) {
1195 if (full
|| n
< j
|| n
>= k
|| max_fault
== 0 ||
1196 srp
->nrecs
== max_fault
+1) {
1198 ari_list
= uurp
->ari_uuid_list
;
1200 (void) printf("%-15s %s\n",
1201 format_date(buf
, sizeof (buf
),
1202 uurp
->sec
), ari_list
->ari_uuid
);
1203 ari_list
= ari_list
->next
;
1206 (void) printf("%-15s %s\n",
1207 format_date(buf
, sizeof (buf
), uurp
->sec
),
1211 (void) printf("... %d %s\n", srp
->nrecs
- max_fault
,
1212 dgettext("FMD", "more entries suppressed"));
1215 (void) printf("\n");
1216 (void) printf("%s %s", dgettext("FMD", "Host :"),
1218 if (srp
->host
->domain
)
1219 (void) printf("\t%s %s", dgettext("FMD", "Domain :"),
1221 (void) printf("\n%s %s", dgettext("FMD", "Platform :"),
1222 srp
->host
->platform
);
1223 (void) printf("\t%s %s", dgettext("FMD", "Chassis_id :"),
1224 srp
->host
->chassis
? srp
->host
->chassis
: "");
1225 (void) printf("\n%s %s\n\n", dgettext("FMD", "Product_sn :"),
1226 srp
->host
->product_sn
? srp
->host
->product_sn
: "");
1228 print_name_list(srp
->class,
1229 dgettext("FMD", "Fault class :"), 0, srp
->class->pct
,
1232 status
= asru_same_status(srp
->asru
);
1234 print_name_list(srp
->asru
,
1235 dgettext("FMD", "Affects :"),
1236 full
? 0 : max_display
, 0, NULL
, full
);
1237 print_asru_status(status
, " ");
1239 print_name_list(srp
->asru
,
1240 dgettext("FMD", "Affects :"),
1241 full
? 0 : max_display
, 0, print_asru_status
, full
);
1243 if (full
|| srp
->fru
== NULL
|| srp
->asru
== NULL
) {
1244 if (srp
->resource
) {
1245 status
= asru_same_status(srp
->resource
);
1247 print_name_list(srp
->resource
,
1248 dgettext("FMD", "Problem in :"),
1249 full
? 0 : max_display
, 0, NULL
, full
);
1250 print_rsrc_status(status
, " ");
1252 print_name_list(srp
->resource
,
1253 dgettext("FMD", "Problem in :"),
1254 full
? 0 : max_display
, 0,
1255 print_rsrc_status
, full
);
1259 status
= asru_same_status(srp
->fru
);
1261 print_name_list(srp
->fru
, dgettext("FMD",
1263 srp
->fru
->pct
== 100 ? 100 : srp
->fru
->max_pct
,
1265 print_fru_status(status
, " ");
1267 print_name_list(srp
->fru
, dgettext("FMD",
1269 srp
->fru
->pct
== 100 ? 100 : srp
->fru
->max_pct
,
1270 print_fru_status
, full
);
1272 if (srp
->serial
&& !serial_in_fru(srp
->fru
, srp
->serial
) &&
1273 !serial_in_fru(srp
->asru
, srp
->serial
)) {
1274 print_name_list(srp
->serial
, dgettext("FMD", "Serial ID. :"),
1277 print_dict_info(srp
->uurec
->event
);
1278 (void) printf("\n");
1282 print_status_record(status_record_t
*srp
, int summary
, int opt_i
, int full
)
1285 uurec_t
*uurp
= srp
->uurec
;
1286 static int header
= 0;
1288 ari_list_t
*ari_list
;
1290 if (!summary
|| !header
) {
1292 head
= "--------------- "
1293 "------------------------------------ "
1294 "-------------- ---------\n"
1297 " SEVERITY\n--------------- "
1298 "------------------------------------ "
1299 " -------------- ---------";
1301 head
= "--------------- "
1302 "------------------------------------ "
1303 "-------------- ---------\n"
1306 " SEVERITY\n--------------- "
1307 "------------------------------------ "
1308 " -------------- ---------";
1310 (void) printf("%s\n", dgettext("FMD", head
));
1314 ari_list
= uurp
->ari_uuid_list
;
1316 (void) printf("%-15s %-37s %-14s %-9s %s\n",
1317 format_date(buf
, sizeof (buf
), uurp
->sec
),
1318 ari_list
->ari_uuid
, srp
->msgid
, srp
->severity
,
1319 srp
->injected
? dgettext("FMD", "injected") : "");
1320 ari_list
= ari_list
->next
;
1323 (void) printf("%-15s %-37s %-14s %-9s %s\n",
1324 format_date(buf
, sizeof (buf
), uurp
->sec
),
1325 uurp
->uuid
, srp
->msgid
, srp
->severity
,
1326 srp
->injected
? dgettext("FMD", "injected") : "");
1330 print_sup_record(srp
, opt_i
, full
);
1334 print_catalog(int summary
, int opt_a
, int full
, int opt_i
, int page_feed
)
1336 status_record_t
*srp
;
1339 slp
= status_rec_list
;
1342 srp
= slp
->status_record
;
1343 if (opt_a
|| srp
->not_suppressed
) {
1345 (void) printf("\f\n");
1346 print_status_record(srp
, summary
, opt_i
, full
);
1348 if (slp
->next
== status_rec_list
)
1355 static name_list_t
*
1356 find_fru(status_record_t
*srp
, char *resource
)
1358 name_list_t
*rt
= NULL
;
1359 name_list_t
*fru
= srp
->fru
;
1362 if (strcmp(resource
, fru
->name
) == 0) {
1367 if (fru
== srp
->fru
)
1374 print_fru_line(name_list_t
*fru
, char *uuid
)
1376 if (fru
->pct
== 100) {
1377 (void) printf("%s %d %s %d%%\n", uuid
, fru
->count
,
1378 dgettext("FMD", "suspects in this FRU total certainty"),
1381 (void) printf("%s %d %s %d%%\n", uuid
, fru
->count
,
1382 dgettext("FMD", "suspects in this FRU max certainty"),
1388 print_fru(int summary
, int opt_a
, int opt_i
, int page_feed
)
1390 resource_list_t
*tp
= status_fru_list
;
1391 status_record_t
*srp
;
1392 sr_list_t
*slp
, *end
;
1396 ari_list_t
*ari_list
;
1399 if (opt_a
|| tp
->not_suppressed
) {
1401 (void) printf("\f\n");
1403 (void) printf("-----------------------------"
1404 "---------------------------------------"
1406 slp
= tp
->status_rec_list
;
1409 srp
= slp
->status_record
;
1410 if (!srp
->not_suppressed
) {
1414 fru
= find_fru(srp
, tp
->resource
);
1417 (void) printf("\"%s\" (%s) ",
1418 fru
->label
, fru
->name
);
1420 (void) printf("%s ",
1425 } while (slp
!= end
);
1427 slp
= tp
->status_rec_list
;
1431 srp
= slp
->status_record
;
1432 if (!srp
->not_suppressed
) {
1438 if (strcmp(tp
->resource
,
1440 status
|= fru
->status
;
1442 if (fru
== srp
->fru
)
1446 } while (slp
!= end
);
1447 if (status
& FM_SUSPECT_NOT_PRESENT
)
1448 (void) printf(dgettext("FMD", "not present"));
1449 else if (status
& FM_SUSPECT_FAULTY
)
1450 (void) printf(dgettext("FMD", "faulty"));
1451 else if (status
& FM_SUSPECT_REPLACED
)
1452 (void) printf(dgettext("FMD", "replaced"));
1453 else if (status
& FM_SUSPECT_REPAIRED
)
1454 (void) printf(dgettext("FMD",
1455 "repair attempted"));
1456 else if (status
& FM_SUSPECT_ACQUITTED
)
1457 (void) printf(dgettext("FMD", "acquitted"));
1459 (void) printf(dgettext("FMD", "removed"));
1462 (void) printf(dgettext("FMD", " injected\n"));
1464 (void) printf(dgettext("FMD", "\n"));
1466 slp
= tp
->status_rec_list
;
1469 srp
= slp
->status_record
;
1470 if (!srp
->not_suppressed
) {
1475 fru
= find_fru(srp
, tp
->resource
);
1478 ari_list
= uurp
->ari_uuid_list
;
1481 ari_list
->ari_uuid
);
1486 print_fru_line(fru
, uurp
->uuid
);
1490 } while (slp
!= end
);
1492 slp
= tp
->status_rec_list
;
1495 srp
= slp
->status_record
;
1496 if (!srp
->not_suppressed
) {
1501 !serial_in_fru(srp
->fru
,
1503 print_name_list(srp
->serial
,
1510 } while (slp
!= end
);
1514 if (tp
== status_fru_list
)
1520 print_asru(int opt_a
)
1522 resource_list_t
*tp
= status_asru_list
;
1523 status_record_t
*srp
;
1524 sr_list_t
*slp
, *end
;
1530 if (opt_a
|| tp
->not_suppressed
) {
1532 slp
= tp
->status_rec_list
;
1535 srp
= slp
->status_record
;
1536 if (!srp
->not_suppressed
) {
1542 if (strcmp(tp
->resource
,
1544 status
|= asru
->status
;
1546 if (asru
== srp
->asru
)
1550 } while (slp
!= end
);
1553 msg
= dgettext("FMD", "ok");
1555 case FM_SUSPECT_DEGRADED
:
1556 msg
= dgettext("FMD", "degraded");
1558 case FM_SUSPECT_FAULTY
| FM_SUSPECT_DEGRADED
:
1559 msg
= dgettext("FMD", "degraded");
1561 case FM_SUSPECT_FAULTY
:
1562 msg
= dgettext("FMD", "degraded");
1564 case FM_SUSPECT_UNUSABLE
:
1565 msg
= dgettext("FMD", "unknown");
1567 case FM_SUSPECT_FAULTY
| FM_SUSPECT_UNUSABLE
:
1568 msg
= dgettext("FMD", "faulted");
1574 (void) printf("%-69s %s", tp
->resource
, msg
);
1576 (void) printf(dgettext("FMD", " injected\n"));
1578 (void) printf(dgettext("FMD", "\n"));
1581 if (tp
== status_asru_list
)
1587 uuid_in_list(char *uuid
, uurec_select_t
*uurecp
)
1590 if (strcmp(uuid
, uurecp
->uuid
) == 0)
1592 uurecp
= uurecp
->next
;
1598 dfault_rec(const fmd_adm_caseinfo_t
*acp
, void *arg
)
1604 uurec_select_t
*uurecp
= (uurec_select_t
*)arg
;
1606 if (nvlist_lookup_int64_array(acp
->aci_event
, FM_SUSPECT_DIAG_TIME
,
1607 &diag_time
, &nelem
) == 0 && nelem
>= 2) {
1608 (void) nvlist_lookup_string(acp
->aci_event
, FM_SUSPECT_UUID
,
1610 if (uurecp
== NULL
|| uuid_in_list(uuid
, uurecp
))
1611 add_fault_record_to_catalog(acp
->aci_event
, *diag_time
,
1621 dstatus_rec(const fmd_adm_rsrcinfo_t
*ari
, void *unused
)
1623 update_asru_state_in_catalog(ari
->ari_case
, ari
->ari_uuid
);
1628 get_cases_from_fmd(fmd_adm_t
*adm
, uurec_select_t
*uurecp
, int opt_i
)
1630 int rt
= FMADM_EXIT_SUCCESS
;
1633 * These calls may fail with Protocol error if message payload is
1636 if (fmd_adm_case_iter(adm
, NULL
, dfault_rec
, uurecp
) != 0)
1637 die("failed to get case list from fmd");
1638 if (opt_i
&& fmd_adm_rsrc_iter(adm
, 1, dstatus_rec
, NULL
) != 0)
1639 die("failed to get case status from fmd");
1644 * fmadm faulty command
1646 * -a show hidden fault records
1647 * -f show faulty fru's
1648 * -g force grouping of similar faults on the same fru
1649 * -n number of fault records to display
1650 * -p pipe output through pager
1651 * -r show faulty asru's
1652 * -s print summary of first fault
1653 * -u print listed uuid's only
1658 cmd_faulty(fmd_adm_t
*adm
, int argc
, char *argv
[])
1660 int opt_a
= 0, opt_v
= 0, opt_p
= 0, opt_s
= 0, opt_r
= 0, opt_f
= 0;
1666 uurec_select_t
*uurecp
= NULL
;
1668 while ((c
= getopt(argc
, argv
, "afgin:prsu:v")) != EOF
) {
1683 max_fault
= atoi(optarg
);
1695 tp
= (uurec_select_t
*)malloc(sizeof (uurec_select_t
));
1705 return (FMADM_EXIT_USAGE
);
1709 return (FMADM_EXIT_USAGE
);
1711 if ((fmadm_msghdl
= fmd_msg_init(NULL
, FMD_MSG_VERSION
)) == NULL
)
1712 return (FMADM_EXIT_ERROR
);
1713 rt
= get_cases_from_fmd(adm
, uurecp
, opt_i
);
1715 if ((pager
= getenv("PAGER")) == NULL
)
1716 pager
= "/usr/bin/more";
1717 fp
= popen(pager
, "w");
1719 rt
= FMADM_EXIT_ERROR
;
1722 (void) dup2(fileno(fp
), 1);
1723 setbuf(stdout
, NULL
);
1727 max_display
= max_fault
;
1729 print_fru(opt_s
, opt_a
, opt_i
, opt_p
&& !opt_s
);
1732 if (opt_f
== 0 && opt_r
== 0)
1733 print_catalog(opt_s
, opt_a
, opt_v
, opt_i
, opt_p
&& !opt_s
);
1734 fmd_msg_fini(fmadm_msghdl
);
1736 topo_close(topo_handle
);
1738 (void) fclose(stdout
);
1745 cmd_flush(fmd_adm_t
*adm
, int argc
, char *argv
[])
1747 int i
, status
= FMADM_EXIT_SUCCESS
;
1749 if (argc
< 2 || (i
= getopt(argc
, argv
, "")) != EOF
)
1750 return (FMADM_EXIT_USAGE
);
1752 for (i
= 1; i
< argc
; i
++) {
1753 if (fmd_adm_rsrc_flush(adm
, argv
[i
]) != 0) {
1754 warn("failed to flush %s", argv
[i
]);
1755 status
= FMADM_EXIT_ERROR
;
1757 note("flushed resource history for %s\n", argv
[i
]);
1764 cmd_repair(fmd_adm_t
*adm
, int argc
, char *argv
[])
1768 if (getopt(argc
, argv
, "") != EOF
)
1769 return (FMADM_EXIT_USAGE
);
1771 if (argc
- optind
!= 1)
1772 return (FMADM_EXIT_USAGE
);
1775 * argument could be a uuid, an fmri (asru, fru or resource)
1776 * or a label. Try uuid first, If that fails try the others.
1778 err
= fmd_adm_case_repair(adm
, argv
[optind
]);
1780 err
= fmd_adm_rsrc_repaired(adm
, argv
[optind
]);
1783 die("failed to record repair to %s", argv
[optind
]);
1785 note("recorded repair to %s\n", argv
[optind
]);
1786 return (FMADM_EXIT_SUCCESS
);
1790 cmd_repaired(fmd_adm_t
*adm
, int argc
, char *argv
[])
1794 if (getopt(argc
, argv
, "") != EOF
)
1795 return (FMADM_EXIT_USAGE
);
1797 if (argc
- optind
!= 1)
1798 return (FMADM_EXIT_USAGE
);
1801 * argument could be an fmri (asru, fru or resource) or a label.
1803 err
= fmd_adm_rsrc_repaired(adm
, argv
[optind
]);
1805 die("failed to record repair to %s", argv
[optind
]);
1807 note("recorded repair to of %s\n", argv
[optind
]);
1808 return (FMADM_EXIT_SUCCESS
);
1812 cmd_replaced(fmd_adm_t
*adm
, int argc
, char *argv
[])
1816 if (getopt(argc
, argv
, "") != EOF
)
1817 return (FMADM_EXIT_USAGE
);
1819 if (argc
- optind
!= 1)
1820 return (FMADM_EXIT_USAGE
);
1823 * argument could be an fmri (asru, fru or resource) or a label.
1825 err
= fmd_adm_rsrc_replaced(adm
, argv
[optind
]);
1827 die("failed to record replacement of %s", argv
[optind
]);
1829 note("recorded replacement of %s\n", argv
[optind
]);
1830 return (FMADM_EXIT_SUCCESS
);
1834 cmd_acquit(fmd_adm_t
*adm
, int argc
, char *argv
[])
1838 if (getopt(argc
, argv
, "") != EOF
)
1839 return (FMADM_EXIT_USAGE
);
1841 if (argc
- optind
!= 1 && argc
- optind
!= 2)
1842 return (FMADM_EXIT_USAGE
);
1845 * argument could be a uuid, an fmri (asru, fru or resource)
1846 * or a label. Or it could be a uuid and an fmri or label.
1848 if (argc
- optind
== 2) {
1849 err
= fmd_adm_rsrc_acquit(adm
, argv
[optind
], argv
[optind
+ 1]);
1851 err
= fmd_adm_rsrc_acquit(adm
, argv
[optind
+ 1],
1854 err
= fmd_adm_case_acquit(adm
, argv
[optind
]);
1856 err
= fmd_adm_rsrc_acquit(adm
, argv
[optind
], "");
1860 die("failed to record acquital of %s", argv
[optind
]);
1862 note("recorded acquital of %s\n", argv
[optind
]);
1863 return (FMADM_EXIT_SUCCESS
);