4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #pragma ident "%Z%%M% %I% %E% SMI"
30 * FMD Log File Subsystem
32 * Events are written to one of two log files as they are received or created;
33 * the error log tracks all ereport.* events received on the inbound event
34 * transport, and the fault log tracks all list.* events generated by fmd or
35 * its client modules. In addition, we use the same log file format to cache
36 * state and events associated with ASRUs that are named in a diagnosis.
38 * The log files use the exacct format manipulated by libexacct(3LIB) and
39 * originally defined in PSARC 1999/119. However, the exacct library was
40 * designed primarily for read-only clients and without the synchronous i/o
41 * considerations and seeking required for fmd, so we use libexacct here only
42 * to read and write the file headers and to pack data from memory into a file
43 * bytestream. All of the i/o and file offset manipulations are performed by
44 * the fmd code below. Our exacct file management uses the following grammar:
46 * file := hdr toc event*
47 * hdr := EXD_FMA_LABEL EXD_FMA_VERSION EXD_FMA_OSREL EXD_FMA_OSVER
48 * EXD_FMA_PLAT EXD_FMA_UUID
49 * toc := EXD_FMA_OFFSET
50 * event := EXD_FMA_TODSEC EXD_FMA_TODNSEC EXD_FMA_NVLIST evref* or legacy evref
51 * evref := EXD_FMA_UUID EXD_FMA_OFFSET
52 * legacy evref := EXD_FMA_MAJOR EXD_FMA_MINOR EXD_FMA_INODE EXD_FMA_OFFSET
54 * Any event can be uniquely identified by the tuple (file, offset) where file
55 * is encoded as (uuid) when we are cross-linking files. For legacy file
56 * formats we still support encoding the reference as (major, minor, inode).
57 * Note that we break out of the file's dev_t into its two 32-bit components to
58 * permit development of either 32-bit or 64-bit log readers and writers; the
59 * LFS APIs do not yet export a 64-bit dev_t to fstat64(), so there is no way
60 * for a 32-bit application to retrieve and store a 64-bit dev_t.
62 * In order to replay events in the event of an fmd crash, events are initially
63 * written to the error log using the group catalog tag EXD_GROUP_RFMA by the
64 * fmd_log_append() function. Later, once an event transitions from the
65 * received state to one of its other states (see fmd_event.c for details),
66 * fmd_log_commit() is used to overwrite the tag with EXD_GROUP_FMA, indicating
67 * that the event is fully processed and no longer needs to be replayed.
70 #include <sys/types.h>
71 #include <sys/mkdev.h>
72 #include <sys/statvfs.h>
73 #include <sys/fm/protocol.h>
74 #include <sys/exacct_impl.h>
75 #include <uuid/uuid.h>
82 #include <fmd_alloc.h>
83 #include <fmd_error.h>
84 #include <fmd_string.h>
85 #include <fmd_event.h>
93 #define CAT_FMA_RGROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_RFMA)
94 #define CAT_FMA_GROUP (EXT_GROUP | EXC_DEFAULT | EXD_GROUP_FMA)
96 #define CAT_FMA_LABEL (EXT_STRING | EXC_DEFAULT | EXD_FMA_LABEL)
97 #define CAT_FMA_VERSION (EXT_STRING | EXC_DEFAULT | EXD_FMA_VERSION)
98 #define CAT_FMA_OSREL (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSREL)
99 #define CAT_FMA_OSVER (EXT_STRING | EXC_DEFAULT | EXD_FMA_OSVER)
100 #define CAT_FMA_PLAT (EXT_STRING | EXC_DEFAULT | EXD_FMA_PLAT)
101 #define CAT_FMA_UUID (EXT_STRING | EXC_DEFAULT | EXD_FMA_UUID)
102 #define CAT_FMA_TODSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODSEC)
103 #define CAT_FMA_TODNSEC (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODNSEC)
104 #define CAT_FMA_NVLIST (EXT_RAW | EXC_DEFAULT | EXD_FMA_NVLIST)
105 #define CAT_FMA_MAJOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MAJOR)
106 #define CAT_FMA_MINOR (EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MINOR)
107 #define CAT_FMA_INODE (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_INODE)
108 #define CAT_FMA_OFFSET (EXT_UINT64 | EXC_DEFAULT | EXD_FMA_OFFSET)
111 fmd_log_write(fmd_log_t
*lp
, const void *buf
, size_t n
)
116 ASSERT(MUTEX_HELD(&lp
->log_lock
));
119 if ((len
= write(lp
->log_fd
, buf
, resid
)) <= 0)
123 buf
= (char *)buf
+ len
;
126 if (resid
== n
&& n
!= 0)
133 fmd_log_write_hdr(fmd_log_t
*lp
, const char *tag
)
135 ea_object_t hdr
, toc
, i0
, i1
, i2
, i3
, i4
, i5
, i6
;
136 const char *osrel
, *osver
, *plat
;
141 (void) fmd_conf_getprop(fmd
.d_conf
, "osrelease", &osrel
);
142 (void) fmd_conf_getprop(fmd
.d_conf
, "osversion", &osver
);
143 (void) fmd_conf_getprop(fmd
.d_conf
, "platform", &plat
);
144 (void) fmd_conf_getprop(fmd
.d_conf
, "uuidlen", &lp
->log_uuidlen
);
146 lp
->log_uuid
= fmd_zalloc(lp
->log_uuidlen
+ 1, FMD_SLEEP
);
148 uuid_unparse(uuid
, lp
->log_uuid
);
150 err
|= ea_set_group(&hdr
, CAT_FMA_GROUP
);
151 err
|= ea_set_group(&toc
, CAT_FMA_GROUP
);
153 err
|= ea_set_item(&i0
, CAT_FMA_LABEL
, tag
, 0);
154 err
|= ea_set_item(&i1
, CAT_FMA_VERSION
, fmd
.d_version
, 0);
155 err
|= ea_set_item(&i2
, CAT_FMA_OSREL
, osrel
, 0);
156 err
|= ea_set_item(&i3
, CAT_FMA_OSVER
, osver
, 0);
157 err
|= ea_set_item(&i4
, CAT_FMA_PLAT
, plat
, 0);
158 err
|= ea_set_item(&i5
, CAT_FMA_UUID
, lp
->log_uuid
, 0);
159 err
|= ea_set_item(&i6
, CAT_FMA_OFFSET
, &off
, 0);
161 (void) ea_attach_to_group(&hdr
, &i0
);
162 (void) ea_attach_to_group(&hdr
, &i1
);
163 (void) ea_attach_to_group(&hdr
, &i2
);
164 (void) ea_attach_to_group(&hdr
, &i3
);
165 (void) ea_attach_to_group(&hdr
, &i4
);
166 (void) ea_attach_to_group(&hdr
, &i5
);
167 (void) ea_attach_to_group(&toc
, &i6
);
170 size_t hdr_size
= ea_pack_object(&hdr
, NULL
, 0);
171 size_t toc_size
= ea_pack_object(&toc
, NULL
, 0);
173 size_t size
= hdr_size
+ toc_size
;
174 void *buf
= fmd_alloc(size
, FMD_SLEEP
);
176 (void) ea_pack_object(&hdr
, buf
, hdr_size
);
177 (void) ea_pack_object(&toc
, (char *)buf
+ hdr_size
, toc_size
);
179 if ((lp
->log_off
= lseek64(lp
->log_fd
, 0, SEEK_END
)) == -1L)
180 fmd_panic("failed to seek log %s", lp
->log_name
);
182 if (fmd_log_write(lp
, buf
, size
) != size
)
183 err
= errno
; /* save errno for fmd_set_errno() below */
187 lp
->log_toc
= lp
->log_off
+ hdr_size
;
188 lp
->log_beg
= lp
->log_off
+ hdr_size
+ toc_size
;
189 lp
->log_off
= lp
->log_off
+ hdr_size
+ toc_size
;
191 if (lp
->log_off
!= lseek64(lp
->log_fd
, 0, SEEK_END
))
192 fmd_panic("eof off != log_off 0x%llx\n", lp
->log_off
);
194 err
= EFMD_LOG_EXACCT
;
196 (void) ea_free_item(&i0
, EUP_ALLOC
);
197 (void) ea_free_item(&i1
, EUP_ALLOC
);
198 (void) ea_free_item(&i2
, EUP_ALLOC
);
199 (void) ea_free_item(&i3
, EUP_ALLOC
);
200 (void) ea_free_item(&i4
, EUP_ALLOC
);
201 (void) ea_free_item(&i5
, EUP_ALLOC
);
202 (void) ea_free_item(&i6
, EUP_ALLOC
);
204 return (err
? fmd_set_errno(err
) : 0);
208 fmd_log_check_err(fmd_log_t
*lp
, int err
, const char *msg
)
210 int eaerr
= ea_error();
213 (void) snprintf(buf
, sizeof (buf
), "%s: %s: %s\n",
214 lp
->log_name
, msg
, eaerr
!= EXR_OK
?
215 fmd_ea_strerror(eaerr
) : "catalog tag mismatch");
218 return (fmd_set_errno(err
));
222 fmd_log_check_hdr(fmd_log_t
*lp
, const char *tag
)
224 int got_version
= 0, got_label
= 0;
225 ea_object_t
*grp
, *obj
;
226 off64_t hdr_off
, hdr_size
;
230 ea_clear(&lp
->log_ea
); /* resync exacct file */
232 if ((hdr_off
= lseek64(lp
->log_fd
, 0, SEEK_CUR
)) == -1L)
233 fmd_panic("failed to seek log %s", lp
->log_name
);
236 * Read the first group of log meta-data: the write-once read-only
237 * file header. We read all records in this group, ignoring all but
238 * the VERSION and LABEL, which are required and must be verified.
240 if ((grp
= ea_get_object_tree(&lp
->log_ea
, 1)) == NULL
||
241 grp
->eo_catalog
!= CAT_FMA_GROUP
) {
242 ea_free_object(grp
, EUP_ALLOC
);
243 return (fmd_log_check_err(lp
, EFMD_LOG_INVAL
,
244 "invalid fma hdr record group"));
247 for (obj
= grp
->eo_group
.eg_objs
; obj
!= NULL
; obj
= obj
->eo_next
) {
248 switch (obj
->eo_catalog
) {
249 case CAT_FMA_VERSION
:
250 for (dvers
= 0, p
= fmd
.d_version
;
253 dvers
= dvers
* 10 + (*p
- '0');
258 for (fvers
= 0, p
= obj
->eo_item
.ei_string
;
261 fvers
= fvers
* 10 + (*p
- '0');
267 fmd_error(EFMD_LOG_INVAL
, "%s: log version "
268 "%s is not supported by this daemon\n",
269 lp
->log_name
, obj
->eo_item
.ei_string
);
270 ea_free_object(grp
, EUP_ALLOC
);
271 return (fmd_set_errno(EFMD_LOG_VERSION
));
278 if (strcmp(obj
->eo_item
.ei_string
, tag
) != 0) {
279 fmd_error(EFMD_LOG_INVAL
, "%s: log tag '%s' "
280 "does not matched expected tag '%s'\n",
281 lp
->log_name
, obj
->eo_item
.ei_string
, tag
);
282 ea_free_object(grp
, EUP_ALLOC
);
283 return (fmd_set_errno(EFMD_LOG_INVAL
));
288 lp
->log_uuid
= fmd_strdup(obj
->eo_item
.ei_string
,
290 lp
->log_uuidlen
= strlen(lp
->log_uuid
);
295 hdr_size
= ea_pack_object(grp
, NULL
, 0);
296 ea_free_object(grp
, EUP_ALLOC
);
298 if (!got_version
|| !got_label
) {
299 fmd_error(EFMD_LOG_INVAL
, "%s: fmd hdr record group did not "
300 "include mandatory version and/or label\n", lp
->log_name
);
301 return (fmd_set_errno(EFMD_LOG_INVAL
));
305 * Read the second group of log meta-data: the table of contents. We
306 * expect this group to contain an OFFSET object indicating the current
307 * value of log_skip. We save this in our fmd_log_t and then return.
309 if ((grp
= ea_get_object_tree(&lp
->log_ea
, 1)) == NULL
||
310 grp
->eo_catalog
!= CAT_FMA_GROUP
|| grp
->eo_group
.eg_nobjs
< 1 ||
311 grp
->eo_group
.eg_objs
->eo_catalog
!= CAT_FMA_OFFSET
) {
312 ea_free_object(grp
, EUP_ALLOC
);
313 return (fmd_log_check_err(lp
, EFMD_LOG_INVAL
,
314 "invalid fma toc record group"));
317 lp
->log_toc
= hdr_off
+ hdr_size
;
318 lp
->log_beg
= hdr_off
+ hdr_size
+ ea_pack_object(grp
, NULL
, 0);
319 lp
->log_off
= lseek64(lp
->log_fd
, 0, SEEK_END
);
320 lp
->log_skip
= grp
->eo_group
.eg_objs
->eo_item
.ei_uint64
;
322 if (lp
->log_skip
> lp
->log_off
) {
323 fmd_error(EFMD_LOG_INVAL
, "%s: skip %llx exceeds file size; "
324 "resetting to zero\n", lp
->log_name
, lp
->log_skip
);
328 ea_free_object(grp
, EUP_ALLOC
);
333 fmd_log_open_exacct(fmd_log_t
*lp
, int aflags
, int oflags
)
335 int fd
= dup(lp
->log_fd
);
338 (void) fmd_conf_getprop(fmd
.d_conf
, "log.creator", &creator
);
340 if (ea_fdopen(&lp
->log_ea
, fd
, creator
, aflags
, oflags
) != 0) {
341 fmd_error(EFMD_LOG_EXACCT
, "%s: failed to open log file: %s\n",
342 lp
->log_name
, fmd_ea_strerror(ea_error()));
344 return (fmd_set_errno(EFMD_LOG_EXACCT
));
347 lp
->log_flags
|= FMD_LF_EAOPEN
;
352 fmd_log_xopen(const char *root
, const char *name
, const char *tag
, int oflags
)
354 fmd_log_t
*lp
= fmd_zalloc(sizeof (fmd_log_t
), FMD_SLEEP
);
361 (void) pthread_mutex_init(&lp
->log_lock
, NULL
);
362 (void) pthread_cond_init(&lp
->log_cv
, NULL
);
363 (void) pthread_mutex_lock(&lp
->log_lock
);
365 if (strcmp(root
, "") == 0)
367 len
= strlen(root
) + strlen(name
) + strlen(slash
) + 1; /* for "\0" */
368 lp
->log_name
= fmd_alloc(len
, FMD_SLEEP
);
369 (void) snprintf(lp
->log_name
, len
, "%s%s%s", root
, slash
, name
);
370 lp
->log_tag
= fmd_strdup(tag
, FMD_SLEEP
);
371 (void) fmd_conf_getprop(fmd
.d_conf
, "log.minfree", &lp
->log_minfree
);
373 if (strcmp(lp
->log_tag
, FMD_LOG_ERROR
) == 0)
374 lp
->log_flags
|= FMD_LF_REPLAY
;
376 if (strcmp(lp
->log_tag
, FMD_LOG_XPRT
) == 0)
380 if ((lp
->log_fd
= open64(lp
->log_name
, oflags
, 0644)) == -1 ||
381 fstat64(lp
->log_fd
, &lp
->log_stat
) == -1) {
382 fmd_error(EFMD_LOG_OPEN
, "failed to open log %s", lp
->log_name
);
388 * If our open() created the log file, use libexacct to write a header
389 * and position the file just after the header (EO_TAIL). If the log
390 * file already existed, use libexacct to validate the header and again
391 * position the file just after the header (EO_HEAD). Note that we lie
392 * to libexacct about 'oflags' in order to achieve the desired result.
394 if (lp
->log_stat
.st_size
== 0) {
395 err
= fmd_log_open_exacct(lp
, EO_VALID_HDR
| EO_TAIL
,
396 O_CREAT
| O_WRONLY
) || fmd_log_write_hdr(lp
, tag
);
398 err
= fmd_log_open_exacct(lp
, EO_VALID_HDR
| EO_HEAD
,
399 O_RDONLY
) || fmd_log_check_hdr(lp
, tag
);
403 * If ea_fdopen() failed and the log was pre-existing, attempt to move
404 * it aside and start a new one. If we created the log but failed to
405 * initialize it, then we have no choice but to give up (e.g. EROFS).
408 fmd_error(EFMD_LOG_OPEN
,
409 "failed to initialize log %s", lp
->log_name
);
411 if (lp
->log_flags
& FMD_LF_EAOPEN
) {
412 lp
->log_flags
&= ~FMD_LF_EAOPEN
;
413 (void) ea_close(&lp
->log_ea
);
416 (void) close(lp
->log_fd
);
419 if (lp
->log_stat
.st_size
!= 0 && snprintf(buf
,
420 sizeof (buf
), "%s-", lp
->log_name
) < PATH_MAX
&&
421 rename(lp
->log_name
, buf
) == 0) {
422 TRACE((FMD_DBG_LOG
, "mv %s to %s", lp
->log_name
, buf
));
423 if (oflags
& O_CREAT
)
432 (void) pthread_mutex_unlock(&lp
->log_lock
);
438 fmd_log_tryopen(const char *root
, const char *name
, const char *tag
)
440 return (fmd_log_xopen(root
, name
, tag
, O_RDWR
| O_SYNC
));
444 fmd_log_open(const char *root
, const char *name
, const char *tag
)
446 return (fmd_log_xopen(root
, name
, tag
, O_RDWR
| O_CREAT
| O_SYNC
));
450 fmd_log_close(fmd_log_t
*lp
)
452 ASSERT(MUTEX_HELD(&lp
->log_lock
));
453 ASSERT(lp
->log_refs
== 0);
455 if ((lp
->log_flags
& FMD_LF_EAOPEN
) && ea_close(&lp
->log_ea
) != 0) {
456 fmd_error(EFMD_LOG_CLOSE
, "failed to close log %s: %s\n",
457 lp
->log_name
, fmd_ea_strerror(ea_error()));
460 if (lp
->log_fd
>= 0 && close(lp
->log_fd
) != 0) {
461 fmd_error(EFMD_LOG_CLOSE
,
462 "failed to close log %s", lp
->log_name
);
465 fmd_strfree(lp
->log_name
);
466 fmd_strfree(lp
->log_tag
);
467 if (lp
->log_uuid
!= NULL
)
468 fmd_free(lp
->log_uuid
, lp
->log_uuidlen
+ 1);
470 fmd_free(lp
, sizeof (fmd_log_t
));
474 fmd_log_hold_pending(fmd_log_t
*lp
)
476 (void) pthread_mutex_lock(&lp
->log_lock
);
479 ASSERT(lp
->log_refs
!= 0);
481 if (lp
->log_flags
& FMD_LF_REPLAY
) {
483 ASSERT(lp
->log_pending
!= 0);
486 (void) pthread_mutex_unlock(&lp
->log_lock
);
490 fmd_log_hold(fmd_log_t
*lp
)
492 (void) pthread_mutex_lock(&lp
->log_lock
);
494 ASSERT(lp
->log_refs
!= 0);
495 (void) pthread_mutex_unlock(&lp
->log_lock
);
499 fmd_log_rele(fmd_log_t
*lp
)
501 (void) pthread_mutex_lock(&lp
->log_lock
);
502 ASSERT(lp
->log_refs
!= 0);
504 if (--lp
->log_refs
== 0)
507 (void) pthread_mutex_unlock(&lp
->log_lock
);
511 fmd_log_append(fmd_log_t
*lp
, fmd_event_t
*e
, fmd_case_t
*cp
)
513 fmd_event_impl_t
*ep
= (fmd_event_impl_t
*)e
;
514 fmd_case_impl_t
*cip
= (fmd_case_impl_t
*)cp
;
517 ea_object_t grp0
, grp1
, i0
, i1
, i2
, *items
;
518 ea_object_t
**fe
= NULL
;
519 size_t nvsize
, easize
, itsize
, frsize
;
523 (void) pthread_mutex_lock(&ep
->ev_lock
);
525 ASSERT(ep
->ev_flags
& FMD_EVF_VOLATILE
);
526 ASSERT(ep
->ev_log
== NULL
);
528 (void) nvlist_size(ep
->ev_nvl
, &nvsize
, NV_ENCODE_XDR
);
529 nvbuf
= fmd_alloc(nvsize
, FMD_SLEEP
);
530 (void) nvlist_pack(ep
->ev_nvl
, &nvbuf
, &nvsize
, NV_ENCODE_XDR
, 0);
532 if (lp
->log_flags
& FMD_LF_REPLAY
)
533 err
|= ea_set_group(&grp0
, CAT_FMA_RGROUP
);
535 err
|= ea_set_group(&grp0
, CAT_FMA_GROUP
);
537 err
|= ea_set_item(&i0
, CAT_FMA_TODSEC
, &ep
->ev_time
.ftv_sec
, 0);
538 err
|= ea_set_item(&i1
, CAT_FMA_TODNSEC
, &ep
->ev_time
.ftv_nsec
, 0);
539 err
|= ea_set_item(&i2
, CAT_FMA_NVLIST
, nvbuf
, nvsize
);
542 (void) pthread_mutex_unlock(&ep
->ev_lock
);
543 err
= EFMD_LOG_EXACCT
;
547 (void) ea_attach_to_group(&grp0
, &i0
);
548 (void) ea_attach_to_group(&grp0
, &i1
);
549 (void) ea_attach_to_group(&grp0
, &i2
);
552 * If this event has a case associated with it (i.e. it is a list),
553 * then allocate a block of ea_object_t's and fill in a group for
554 * each event saved in the case's item list. For each such group,
555 * we attach it to grp1, which in turn will be attached to grp0.
558 ea_object_t
*egrp
, *ip
, **fp
;
559 fmd_event_impl_t
*eip
;
560 fmd_case_item_t
*cit
;
562 (void) ea_set_group(&grp1
, CAT_FMA_GROUP
);
563 frsize
= sizeof (ea_object_t
*) * cip
->ci_nitems
;
564 itsize
= sizeof (ea_object_t
) * cip
->ci_nitems
* 5;
565 items
= ip
= fmd_alloc(itsize
, FMD_SLEEP
);
567 for (cit
= cip
->ci_items
; cit
!= NULL
; cit
= cit
->cit_next
) {
571 eip
= (fmd_event_impl_t
*)cit
->cit_event
;
573 if (eip
->ev_log
== NULL
)
574 continue; /* event was never logged */
576 maj
= major(eip
->ev_log
->log_stat
.st_dev
);
577 min
= minor(eip
->ev_log
->log_stat
.st_dev
);
579 (void) ea_set_group(ip
, CAT_FMA_GROUP
);
580 egrp
= ip
++; /* first obj is group */
583 * If the event log file is in legacy format,
584 * then write the xref to the file in the legacy
585 * maj/min/inode method else write it using the
588 if (eip
->ev_log
->log_uuid
== NULL
) {
589 (void) ea_set_item(ip
, CAT_FMA_MAJOR
, &maj
, 0);
590 (void) ea_attach_to_group(egrp
, ip
++);
591 (void) ea_set_item(ip
, CAT_FMA_MINOR
, &min
, 0);
592 (void) ea_attach_to_group(egrp
, ip
++);
593 (void) ea_set_item(ip
, CAT_FMA_INODE
,
594 &eip
->ev_log
->log_stat
.st_ino
, 0);
595 (void) ea_attach_to_group(egrp
, ip
++);
597 if (ea_set_item(ip
, CAT_FMA_UUID
,
598 eip
->ev_log
->log_uuid
, 0) == -1) {
599 err
= EFMD_LOG_EXACCT
;
603 fe
= fp
= fmd_zalloc(frsize
, FMD_SLEEP
);
605 (void) ea_attach_to_group(egrp
, ip
++);
607 (void) ea_set_item(ip
, CAT_FMA_OFFSET
, &eip
->ev_off
, 0);
608 (void) ea_attach_to_group(egrp
, ip
++);
609 (void) ea_attach_to_group(&grp1
, egrp
);
611 (void) ea_attach_to_group(&grp0
, &grp1
);
614 easize
= ea_pack_object(&grp0
, NULL
, 0);
615 eabuf
= fmd_alloc(easize
, FMD_SLEEP
);
616 (void) ea_pack_object(&grp0
, eabuf
, easize
);
619 * Before writing the record, check to see if this would cause the free
620 * space in the filesystem to drop below our minfree threshold. If so,
621 * don't bother attempting the write and instead pretend it failed. As
622 * fmd(1M) runs as root, it will be able to access the space "reserved"
623 * for root, and therefore can run the system of out of disk space in a
624 * heavy error load situation, violating the basic design principle of
625 * fmd(1M) that we don't want to make a bad situation even worse.
627 (void) pthread_mutex_lock(&lp
->log_lock
);
629 if (lp
->log_minfree
!= 0 && fstatvfs64(lp
->log_fd
, &stv
) == 0 &&
630 stv
.f_bavail
* stv
.f_frsize
< lp
->log_minfree
+ easize
) {
632 TRACE((FMD_DBG_LOG
, "append %s crosses minfree", lp
->log_tag
));
633 err
= EFMD_LOG_MINFREE
;
635 } else if (fmd_log_write(lp
, eabuf
, easize
) == easize
) {
636 TRACE((FMD_DBG_LOG
, "append %s %p off=0x%llx",
637 lp
->log_tag
, (void *)ep
, (u_longlong_t
)lp
->log_off
));
639 ep
->ev_flags
&= ~FMD_EVF_VOLATILE
;
641 ep
->ev_off
= lp
->log_off
;
644 if (lp
->log_flags
& FMD_LF_REPLAY
) {
646 ASSERT(lp
->log_pending
!= 0);
650 ASSERT(lp
->log_refs
!= 0);
651 lp
->log_off
+= easize
;
653 err
= errno
; /* save errno for fmd_error() call below */
656 * If we can't write append the record, seek the file back to
657 * the original location and truncate it there in order to make
658 * sure the file is always in a sane state w.r.t. libexacct.
660 (void) lseek64(lp
->log_fd
, lp
->log_off
, SEEK_SET
);
661 (void) ftruncate64(lp
->log_fd
, lp
->log_off
);
664 (void) pthread_mutex_unlock(&lp
->log_lock
);
665 (void) pthread_mutex_unlock(&ep
->ev_lock
);
667 fmd_free(eabuf
, easize
);
672 ea_object_t
**fp
= fe
;
675 for (; *fp
!= NULL
&& i
< cip
->ci_nitems
; i
++)
676 (void) ea_free_item(*fp
++, EUP_ALLOC
);
677 fmd_free(fe
, frsize
);
680 fmd_free(items
, itsize
);
684 fmd_free(nvbuf
, nvsize
);
686 (void) ea_free_item(&i0
, EUP_ALLOC
);
687 (void) ea_free_item(&i1
, EUP_ALLOC
);
688 (void) ea_free_item(&i2
, EUP_ALLOC
);
691 * Keep track of out-of-space errors using global statistics. As we're
692 * out of disk space, it's unlikely the EFMD_LOG_APPEND will be logged.
694 if (err
== ENOSPC
|| err
== EFMD_LOG_MINFREE
) {
697 if (lp
== fmd
.d_errlog
)
698 sp
= &fmd
.d_stats
->ds_err_enospc
;
699 else if (lp
== fmd
.d_fltlog
)
700 sp
= &fmd
.d_stats
->ds_flt_enospc
;
702 sp
= &fmd
.d_stats
->ds_oth_enospc
;
704 (void) pthread_mutex_lock(&fmd
.d_stats_lock
);
705 sp
->fmds_value
.ui64
++;
706 (void) pthread_mutex_unlock(&fmd
.d_stats_lock
);
710 fmd_error(EFMD_LOG_APPEND
, "failed to log_append %s %p: %s\n",
711 lp
->log_tag
, (void *)ep
, fmd_strerror(err
));
716 * Commit an event to the log permanently, indicating that it should not be
717 * replayed on restart. This is done by overwriting the event group's catalog
718 * code with EXD_GROUP_FMA (from EXD_GROUP_RFMA used in fmd_log_append()). We
719 * use pwrite64() to update the existing word directly, using somewhat guilty
720 * knowledge that exacct stores the 32-bit catalog word first for each object.
721 * Since we are overwriting an existing log location using pwrite64() and hold
722 * the event lock, we do not need to hold the log_lock during the i/o.
725 fmd_log_commit(fmd_log_t
*lp
, fmd_event_t
*e
)
727 fmd_event_impl_t
*ep
= (fmd_event_impl_t
*)e
;
731 if (!(lp
->log_flags
& FMD_LF_REPLAY
))
732 return; /* log does not require replay tagging */
734 ASSERT(MUTEX_HELD(&ep
->ev_lock
));
735 ASSERT(ep
->ev_log
== lp
&& ep
->ev_off
!= 0);
740 if (pwrite64(lp
->log_fd
, &c
, sizeof (c
), ep
->ev_off
) == sizeof (c
)) {
741 TRACE((FMD_DBG_LOG
, "commit %s %p", lp
->log_tag
, (void *)ep
));
742 ep
->ev_flags
&= ~FMD_EVF_REPLAY
;
745 * If we have committed the event, check to see if the TOC skip
746 * offset needs to be updated, and decrement the pending count.
748 (void) pthread_mutex_lock(&lp
->log_lock
);
750 if (lp
->log_skip
== ep
->ev_off
) {
751 lp
->log_flags
|= FMD_LF_DIRTY
;
752 lp
->log_skip
+= ep
->ev_len
;
755 ASSERT(lp
->log_pending
!= 0);
758 (void) pthread_cond_broadcast(&lp
->log_cv
);
759 (void) pthread_mutex_unlock(&lp
->log_lock
);
762 fmd_error(EFMD_LOG_COMMIT
, "failed to log_commit %s %p: %s\n",
763 lp
->log_tag
, (void *)ep
, fmd_strerror(err
));
768 * If we need to destroy an event and it wasn't able to be committed, we permit
769 * the owner to decommit from ever trying again. This operation decrements the
770 * pending count on the log and broadcasts to anyone waiting on log_cv.
773 fmd_log_decommit(fmd_log_t
*lp
, fmd_event_t
*e
)
775 fmd_event_impl_t
*ep
= (fmd_event_impl_t
*)e
;
777 if (!(lp
->log_flags
& FMD_LF_REPLAY
))
778 return; /* log does not require replay tagging */
780 ASSERT(MUTEX_HELD(&ep
->ev_lock
));
781 ASSERT(ep
->ev_log
== lp
);
783 (void) pthread_mutex_lock(&lp
->log_lock
);
785 TRACE((FMD_DBG_LOG
, "decommit %s %p", lp
->log_tag
, (void *)ep
));
786 ep
->ev_flags
&= ~FMD_EVF_REPLAY
;
788 ASSERT(lp
->log_pending
!= 0);
791 (void) pthread_cond_broadcast(&lp
->log_cv
);
792 (void) pthread_mutex_unlock(&lp
->log_lock
);
796 fmd_log_unpack(fmd_log_t
*lp
, ea_object_t
*grp
, off64_t off
)
798 fmd_timeval_t ftv
= { -1ULL, -1ULL };
799 nvlist_t
*nvl
= NULL
;
805 for (obj
= grp
->eo_group
.eg_objs
; obj
!= NULL
; obj
= obj
->eo_next
) {
806 switch (obj
->eo_catalog
) {
808 if ((err
= nvlist_xunpack(obj
->eo_item
.ei_raw
,
809 obj
->eo_item
.ei_size
, &nvl
, &fmd
.d_nva
)) != 0) {
810 fmd_error(EFMD_LOG_UNPACK
, "failed to unpack "
811 "log nvpair: %s\n", fmd_strerror(err
));
817 ftv
.ftv_sec
= obj
->eo_item
.ei_uint64
;
820 case CAT_FMA_TODNSEC
:
821 ftv
.ftv_nsec
= obj
->eo_item
.ei_uint64
;
826 if (nvl
== NULL
|| ftv
.ftv_sec
== -1ULL || ftv
.ftv_nsec
== -1ULL) {
827 fmd_error(EFMD_LOG_UNPACK
, "failed to unpack log event: "
828 "required object(s) missing from record group\n");
833 if (nvlist_lookup_string(nvl
, FM_CLASS
, &class) != 0) {
834 fmd_error(EFMD_LOG_UNPACK
, "failed to unpack log event: "
835 "record is missing required '%s' nvpair\n", FM_CLASS
);
840 return (fmd_event_recreate(FMD_EVT_PROTOCOL
,
841 &ftv
, nvl
, class, lp
, off
, ea_pack_object(grp
, NULL
, 0)));
845 * Replay event(s) from the specified log by invoking the specified callback
846 * function 'func' for each event. If the log has the FMD_LF_REPLAY flag set,
847 * we replay all events after log_skip that have the FMA_RGROUP group tag.
848 * This mode is used for the error telemetry log. If the log does not have
849 * this flag set (used for ASRU logs), only the most recent event is replayed.
852 fmd_log_replay(fmd_log_t
*lp
, fmd_log_f
*func
, void *data
)
854 ea_object_t obj
, *grp
;
855 ea_object_type_t type
;
861 (void) pthread_mutex_lock(&lp
->log_lock
);
863 if (lp
->log_stat
.st_size
== 0 && (lp
->log_flags
& FMD_LF_REPLAY
)) {
864 (void) pthread_mutex_unlock(&lp
->log_lock
);
865 return; /* we just created this log: never replay events */
868 while (lp
->log_flags
& FMD_LF_BUSY
)
869 (void) pthread_cond_wait(&lp
->log_cv
, &lp
->log_lock
);
871 if (lp
->log_off
== lp
->log_beg
) {
872 (void) pthread_mutex_unlock(&lp
->log_lock
);
873 return; /* no records appended yet */
876 lp
->log_flags
|= FMD_LF_BUSY
;
878 ea_clear(&lp
->log_ea
); /* resync exacct file */
881 * If FMD_LF_REPLAY is set, begin our replay at either log_skip (if it
882 * is non-zero) or at log_beg. Otherwise replay from the end (log_off)
884 if (lp
->log_flags
& FMD_LF_REPLAY
) {
885 off
= MAX(lp
->log_beg
, lp
->log_skip
);
892 if (lseek64(lp
->log_fd
, off
, SEEK_SET
) != off
) {
893 fmd_panic("failed to seek %s to 0x%llx\n",
894 lp
->log_name
, (u_longlong_t
)off
);
898 * If FMD_LF_REPLAY is not set, back up to the start of the previous
899 * object and make sure this object is an EO_GROUP; otherwise return.
901 if (!(lp
->log_flags
& FMD_LF_REPLAY
) &&
902 (type
= ea_previous_object(&lp
->log_ea
, &obj
)) != EO_GROUP
) {
903 fmd_error(EFMD_LOG_REPLAY
, "last log object is of unexpected "
904 "type %d (log may be truncated or corrupt)\n", type
);
908 while ((grp
= ea_get_object_tree(&lp
->log_ea
, 1)) != NULL
) {
909 if (!(lp
->log_flags
& FMD_LF_REPLAY
))
910 off
-= ea_pack_object(grp
, NULL
, 0);
911 else if (n
== 0 && grp
->eo_catalog
== CAT_FMA_GROUP
)
912 skp
= off
; /* update skip */
915 * We temporarily drop log_lock around the call to unpack the
916 * event, hold it, and perform the callback, because these
917 * operations may try to acquire log_lock to bump log_refs.
918 * We cannot lose control because the FMD_LF_BUSY flag is set.
920 (void) pthread_mutex_unlock(&lp
->log_lock
);
922 if (grp
->eo_catalog
== c
&&
923 (ep
= fmd_log_unpack(lp
, grp
, off
)) != NULL
) {
925 TRACE((FMD_DBG_LOG
, "replay %s %p off %llx",
926 lp
->log_tag
, (void *)ep
, (u_longlong_t
)off
));
934 (void) pthread_mutex_lock(&lp
->log_lock
);
935 off
+= ea_pack_object(grp
, NULL
, 0);
936 ea_free_object(grp
, EUP_ALLOC
);
939 if (ea_error() != EXR_EOF
) {
940 fmd_error(EFMD_LOG_REPLAY
, "failed to replay %s event at "
941 "offset 0x%llx: %s\n", lp
->log_name
, (u_longlong_t
)off
,
942 fmd_ea_strerror(ea_error()));
946 skp
= off
; /* if no replays, move skip to where we ended up */
949 if (lseek64(lp
->log_fd
, lp
->log_off
, SEEK_SET
) != lp
->log_off
) {
950 fmd_panic("failed to seek %s to 0x%llx\n",
951 lp
->log_name
, (u_longlong_t
)lp
->log_off
);
954 if (skp
!= lp
->log_skip
) {
955 lp
->log_flags
|= FMD_LF_DIRTY
;
959 lp
->log_flags
&= ~FMD_LF_BUSY
;
960 (void) pthread_cond_broadcast(&lp
->log_cv
);
961 (void) pthread_mutex_unlock(&lp
->log_lock
);
965 fmd_log_update(fmd_log_t
*lp
)
967 ea_object_t toc
, item
;
972 (void) pthread_mutex_lock(&lp
->log_lock
);
974 if (lp
->log_flags
& FMD_LF_DIRTY
) {
975 lp
->log_flags
&= ~FMD_LF_DIRTY
;
979 (void) pthread_mutex_unlock(&lp
->log_lock
);
982 * If the skip needs to be updated, construct a TOC record group
983 * containing the skip offset and overwrite the TOC in-place.
985 if (skip
!= 0 && ea_set_group(&toc
, CAT_FMA_GROUP
) == 0 &&
986 ea_set_item(&item
, CAT_FMA_OFFSET
, &skip
, 0) == 0) {
988 (void) ea_attach_to_group(&toc
, &item
);
989 size
= ea_pack_object(&toc
, NULL
, 0);
990 buf
= fmd_alloc(size
, FMD_SLEEP
);
992 (void) ea_pack_object(&toc
, buf
, size
);
993 ASSERT(lp
->log_toc
+ size
== lp
->log_beg
);
995 if (pwrite64(lp
->log_fd
, buf
, size
, lp
->log_toc
) == size
) {
996 TRACE((FMD_DBG_LOG
, "updated skip to %llx", skip
));
998 fmd_error(EFMD_LOG_UPDATE
,
999 "failed to log_update %s", lp
->log_tag
);
1002 fmd_free(buf
, size
);
1003 (void) ea_free_item(&item
, EUP_ALLOC
);
1008 * Rotate the specified log by renaming its underlying file to a staging file
1009 * that can be handed off to logadm(1M) or an administrator script. If the
1010 * rename succeeds, open a new log file using the old path and return it.
1011 * Note that we are relying our caller to use some higher-level mechanism to
1012 * ensure that fmd_log_rotate() cannot be called while other threads are
1013 * attempting fmd_log_append() using the same log (fmd's d_log_lock is used
1014 * for the global errlog and fltlog).
1017 fmd_log_rotate(fmd_log_t
*lp
)
1019 char npath
[PATH_MAX
];
1022 (void) snprintf(npath
, sizeof (npath
), "%s+", lp
->log_name
);
1025 * Open new log file.
1027 if ((nlp
= fmd_log_open("", npath
, lp
->log_tag
)) == NULL
) {
1028 fmd_error(EFMD_LOG_ROTATE
, "failed to open %s", npath
);
1029 (void) fmd_set_errno(EFMD_LOG_ROTATE
);
1033 (void) snprintf(npath
, sizeof (npath
), "%s.0-", lp
->log_name
);
1034 (void) pthread_mutex_lock(&lp
->log_lock
);
1037 * Check for any pending commits to drain before proceeding. We can't
1038 * rotate the log out if commits are pending because if we die after
1039 * the log is moved aside, we won't be able to replay them on restart.
1041 if (lp
->log_pending
!= 0) {
1042 (void) pthread_mutex_unlock(&lp
->log_lock
);
1043 (void) unlink(nlp
->log_name
);
1045 (void) fmd_set_errno(EFMD_LOG_ROTBUSY
);
1049 if (rename(lp
->log_name
, npath
) != 0) {
1050 (void) pthread_mutex_unlock(&lp
->log_lock
);
1051 fmd_error(EFMD_LOG_ROTATE
, "failed to rename %s", lp
->log_name
);
1052 (void) unlink(nlp
->log_name
);
1054 (void) fmd_set_errno(EFMD_LOG_ROTATE
);
1058 if (rename(nlp
->log_name
, lp
->log_name
) != 0) {
1059 (void) pthread_mutex_unlock(&lp
->log_lock
);
1060 fmd_error(EFMD_LOG_ROTATE
, "failed to rename %s",
1062 (void) unlink(nlp
->log_name
);
1064 (void) fmd_set_errno(EFMD_LOG_ROTATE
);
1069 * Change name of new log file
1071 fmd_strfree(nlp
->log_name
);
1072 nlp
->log_name
= fmd_strdup(lp
->log_name
, FMD_SLEEP
);
1075 * If we've rotated the log, no pending events exist so we don't have
1076 * any more commits coming, and our caller should have arranged for
1077 * no more calls to append. As such, we can close log_fd for good.
1079 if (lp
->log_flags
& FMD_LF_EAOPEN
) {
1080 (void) ea_close(&lp
->log_ea
);
1081 lp
->log_flags
&= ~FMD_LF_EAOPEN
;
1084 (void) close(lp
->log_fd
);
1087 (void) pthread_mutex_unlock(&lp
->log_lock
);