4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/errno.h>
31 #include <sys/sysmacros.h>
32 #include <sys/cmn_err.h>
33 #include <sys/varargs.h>
37 #include <sys/t_lock.h>
39 #include <sys/debug.h>
41 #include <sys/lockfs.h>
42 #include <sys/fs/ufs_fs.h>
43 #include <sys/fs/ufs_inode.h>
44 #include <sys/fs/ufs_panic.h>
45 #include <sys/fs/ufs_lockfs.h>
46 #include <sys/fs/ufs_trans.h>
47 #include <sys/fs/ufs_mount.h>
48 #include <sys/fs/ufs_prot.h>
49 #include <sys/fs/ufs_bio.h>
50 #include <sys/pathname.h>
51 #include <sys/utsname.h>
55 #define abs(x) ((x) < 0? -(x): (x))
59 #define DBGLVL_NONE 0x00000000
60 #define DBGLVL_MAJOR 0x00000100
61 #define DBGLVL_MINOR 0x00000200
62 #define DBGLVL_MINUTE 0x00000400
63 #define DBGLVL_TRIVIA 0x00000800
64 #define DBGLVL_HIDEOUS 0x00001000
66 #define DBGFLG_NONE 0x00000000
67 #define DBGFLG_NOPANIC 0x00000001
68 #define DBGFLG_LVLONLY 0x00000002
69 #define DBGFLG_FIXWOULDPANIC 0x00000004
71 #define DBGFLG_FLAGMASK 0x0000000F
72 #define DBGFLG_LEVELMASK ~DBGFLG_FLAGMASK
74 #define DEBUG_FLAGS (ufs_fix_failure_dbg & DBGFLG_FLAGMASK)
75 #define DEBUG_LEVEL (ufs_fix_failure_dbg & DBGFLG_LEVELMASK)
77 unsigned int ufs_fix_failure_dbg
= DBGLVL_NONE
| DBGFLG_NONE
;
79 #define DCALL(dbg_level, call) \
81 if (DEBUG_LEVEL != DBGLVL_NONE) { \
82 if (DEBUG_FLAGS & DBGFLG_LVLONLY) { \
83 if (DEBUG_LEVEL & dbg_level) { \
87 if (dbg_level <= DEBUG_LEVEL) { \
94 #define DPRINTF(dbg_level, msg) DCALL(dbg_level, printf msg)
96 #define MAJOR(msg) DPRINTF(DBGLVL_MAJOR, msg)
97 #define MINOR(msg) DPRINTF(DBGLVL_MINOR, msg)
98 #define MINUTE(msg) DPRINTF(DBGLVL_MINUTE, msg)
99 #define TRIVIA(msg) DPRINTF(DBGLVL_TRIVIA, msg)
100 #define HIDEOUS(msg) DPRINTF(DBGLVL_HIDEOUS, msg)
104 #define DCALL(ignored_dbg_level, ignored_routine)
105 #define MAJOR(ignored)
106 #define MINOR(ignored)
107 #define MINUTE(ignored)
108 #define TRIVIA(ignored)
109 #define HIDEOUS(ignored)
113 #define NULLSTR(str) (!(str) || *(str) == '\0'? "<null>" : (str))
116 /* somewhat arbitrary limits, in seconds */
117 /* all probably ought to be different, but these are convenient for debugging */
118 const time_t UF_TOO_LONG
= 128; /* max. wait for fsck start */
120 /* all of these are in units of seconds used for retry period while ... */
121 const time_t UF_FIXSTART_PERIOD
= 16; /* awaiting fsck start */
122 const time_t UF_FIXPOLL_PERIOD
= 256; /* awaiting fsck finish */
123 const time_t UF_SHORT_ERROR_PERIOD
= 4; /* after (lockfs) error */
124 const time_t UF_LONG_ERROR_PERIOD
= 512; /* after (lockfs) error */
127 #define LOCKFS_OLOCK LOCKFS_MAXLOCK+1
129 const ulong_t GB
= 1024 * 1024 * 1024;
130 const ulong_t SecondsPerGig
= 1024; /* ~17 minutes (overestimate) */
133 * per filesystem flags
135 const int UFSFX_PANIC
= (UFSMNT_ONERROR_PANIC
>> 4);
136 const int UFSFX_LCKONLY
= (UFSMNT_ONERROR_LOCK
>> 4);
137 const int UFSFX_LCKUMOUNT
= (UFSMNT_ONERROR_UMOUNT
>> 4);
138 const int UFSFX_DEFAULT
= (UFSMNT_ONERROR_DEFAULT
>> 4);
139 const int UFSFX_REPAIR_START
= 0x10000000;
141 /* return protocols */
143 typedef enum triage_return_code
{
149 typedef enum statefunc_return_code
{
154 /* external references */
155 /* in ufs_thread.c */
156 extern int ufs_thread_run(struct ufs_q
*, callb_cpr_t
*cprinfop
);
157 extern int ufs_checkaccton(vnode_t
*); /* in ufs_lockfs.c */
158 extern int ufs_checkswapon(vnode_t
*); /* in ufs_lockfs.c */
160 extern struct pollhead ufs_pollhd
; /* in ufs_vnops.c */
163 struct ufs_q ufs_fix
;
166 * patchable constants:
167 * These are set in ufsfx_init() [called at modload]
169 struct ufs_failure_tunable
{
170 long uft_too_long
; /* limit repair startup time */
171 long uft_fixstart_period
; /* pre-repair start period */
172 long uft_fixpoll_period
; /* post-fsck start period */
173 long uft_short_err_period
; /* post-error short period */
174 long uft_long_err_period
; /* post-error long period */
177 /* internal statistics of events */
178 struct uf_statistics
{
179 ulong_t ufst_lock_violations
;
180 ulong_t ufst_current_races
;
181 ulong_t ufst_unmount_failures
;
182 ulong_t ufst_num_fixed
;
183 ulong_t ufst_num_failed
;
184 ulong_t ufst_cpu_waste
;
185 time_t ufst_last_start_tm
;
189 typedef enum state_action
{
190 UFA_ERROR
= -1, /* internal error */
191 UFA_FOUND
, /* found uf in state */
192 UFA_SET
/* change uf to state */
195 /* state definition */
196 typedef struct uf_state_desc
{
197 int ud_v
; /* value */
198 char *ud_name
; /* name */
199 sfrc_t (*ud_sfp
)(ufs_failure_t
*, ufsa_t
, ufs_failure_states_t
);
200 /* per-state actions */
201 ufs_failure_states_t ud_prev
; /* valid prev. states */
203 struct uf_state_desc_attr
{
204 unsigned terminal
:1; /* no action req. if found */
205 unsigned at_fail
:1; /* state set by thread */
206 /* encountering the error */
215 /* thread to watch for failures */
216 static void ufsfx_thread_fix_failures(void *);
217 static int ufsfx_do_failure_q(void);
218 static void ufsfx_kill_fix_failure_thread(void *);
220 /* routines called when failure occurs */
221 static int ufs_fault_v(vnode_t
*, char *, va_list)
223 static ufs_failure_t
*init_failure(vnode_t
*, char *, va_list)
225 static void queue_failure(ufs_failure_t
*);
227 static void real_panic(ufs_failure_t
*, const char *, ...)
229 static void real_panic_v(ufs_failure_t
*, const char *, va_list)
231 static triage_t
triage(vnode_t
*);
233 /* routines called when failure record is acted upon */
234 static sfrc_t
set_state(ufs_failure_t
*, ufs_failure_states_t
);
235 static int state_trans_valid(ufs_failure_states_t
, ufs_failure_states_t
);
236 static int terminal_state(ufs_failure_states_t
);
238 /* routines called when states entered/found */
239 static sfrc_t
sf_minimum(ufs_failure_t
*, ufsa_t
, ufs_failure_states_t
);
240 static sfrc_t
sf_undef(ufs_failure_t
*, ufsa_t
, ufs_failure_states_t
);
241 static sfrc_t
sf_init(ufs_failure_t
*, ufsa_t
, ufs_failure_states_t
);
242 static sfrc_t
sf_queue(ufs_failure_t
*, ufsa_t
, ufs_failure_states_t
);
243 static sfrc_t
sf_found_queue(ufs_failure_t
*);
244 static sfrc_t
sf_nonterm_cmn(ufs_failure_t
*, ufsa_t
, ufs_failure_states_t
);
245 static sfrc_t
sf_term_cmn(ufs_failure_t
*, ufsa_t
, ufs_failure_states_t
);
246 static sfrc_t
sf_panic(ufs_failure_t
*, ufsa_t
, ufs_failure_states_t
);
247 static sfrc_t
sf_set_trylck(ufs_failure_t
*);
248 static sfrc_t
sf_set_locked(ufs_failure_t
*);
249 static sfrc_t
sf_found_trylck(ufs_failure_t
*);
250 static sfrc_t
sf_found_lock_fix_cmn(ufs_failure_t
*, ufs_failure_states_t
);
251 static sfrc_t
sf_found_umount(ufs_failure_t
*);
253 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */
254 static time_t trylock_time_exceeded(ufs_failure_t
*);
255 static void pester_msg(ufs_failure_t
*, int);
256 static int get_lockfs_status(ufs_failure_t
*, struct lockfs
*);
257 static void alloc_lockfs_comment(ufs_failure_t
*, struct lockfs
*);
258 static int set_lockfs(ufs_failure_t
*, struct lockfs
*);
259 static int lockfs_failure(ufs_failure_t
*);
260 static int lockfs_success(ufs_failure_t
*);
261 static int fsck_active(ufs_failure_t
*);
263 /* low-level support routines */
264 static ufsd_t
*get_state_desc(ufs_failure_states_t
);
265 static char *fs_name(ufs_failure_t
*);
268 static char *state_name(ufs_failure_states_t
);
269 static char *lock_name(struct lockfs
*);
270 static char *err_name(int);
271 static char *act_name(ufsa_t
);
272 static void dump_uf_list(char *msg
);
273 static void dump_uf(ufs_failure_t
*, int i
);
280 * if flagged to be locked but not unmounted: (UFSMNT_ONERROR_LOCK)
281 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED
283 * The only difference between these two is that the fsck must be started
286 * if flagged to be unmounted: (UFSMNT_ONERROR_UMOUNT)
287 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX
289 * if flagged to panic: (UFSMNT_ONERROR_PANIC)
290 * UNDEF -> INIT -> PANIC
292 * if a secondary panic on a file system which has an active failure
294 * UNDEF -> INIT -> QUEUE -> REPLICA
296 * UNDEF, INIT, QUEUE all are set in the context of the failing thread.
297 * All other states (except possibly PANIC) are set in by the monitor
302 ufsd_t state_desc
[] =
304 { UF_ILLEGAL
, "in an unknown state", sf_minimum
, UF_ILLEGAL
,
306 { UF_UNDEF
, "undefined", sf_undef
, UF_UNDEF
,
308 { UF_INIT
, "being initialized", sf_init
, UF_UNDEF
,
310 { UF_QUEUE
, "queued", sf_queue
, UF_INIT
,
312 { UF_TRYLCK
, "trying to be locked", sf_nonterm_cmn
,
313 UF_QUEUE
, { 0, 0, 0 } },
314 { UF_LOCKED
, "locked", sf_nonterm_cmn
,
315 UF_TRYLCK
| UF_FIXING
, { 0, 0, 0 } },
316 { UF_UMOUNT
, "being unmounted", sf_nonterm_cmn
,
321 UF_TRYLCK
| UF_LOCKED
, { 0, 0, 0 } },
322 { UF_FIXING
, "being fixed", sf_nonterm_cmn
,
323 UF_LOCKED
, { 0, 0, 0 } },
324 { UF_FIXED
, "fixed", sf_term_cmn
,
325 UF_FIXING
, { 1, 0, 0 } },
326 { UF_NOTFIX
, "not fixed", sf_term_cmn
,
332 UF_QUEUE
| UF_TRYLCK
| UF_LOCKED
| UF_UMOUNT
| UF_FIXING
,
334 { UF_REPLICA
, "a replica", sf_term_cmn
,
335 UF_QUEUE
, { 1, 0, 0 } },
336 { UF_PANIC
, "panicking", sf_panic
,
337 /* XXX make this narrower */ UF_ALLSTATES
, { 0, 0, 0 } },
338 { UF_UNDEF
, NULL
, ((sfrc_t (*)()) NULL
),
339 UF_UNDEF
, { 0, 0, 0 } }
342 /* unified collection */
344 struct uf_statistics
*ufi_statp
;
345 struct ufs_failure_tunable
*ufi_tunep
;
346 ufsd_t
*ufi_statetab
;
350 struct action_description
{
357 struct error_description
{
362 { EUNK
, "<unexpected errno?>" },
363 { EINVAL
, "EINVAL" },
364 { EACCES
, "EACCES" },
367 { EDEADLK
, "EDEADLK" },
369 { EAGAIN
, "EAGAIN" },
370 { ERESTART
, "ERESTART" },
371 { ETIMEDOUT
, "ETIMEDOUT" },
376 struct action_description act_desc
[] =
378 { UFA_ERROR
, "<unexpected action?>" },
379 { UFA_FOUND
, "\"found\"" },
380 { UFA_SET
, "\"set\"" },
384 #define LOCKFS_BADLOCK (-1)
386 struct lock_description
{
391 { LOCKFS_BADLOCK
, "<unexpected lock?>" },
392 { LOCKFS_ULOCK
, "Unlock" },
393 { LOCKFS_ELOCK
, "Error Lock" },
394 { LOCKFS_HLOCK
, "Hard Lock" },
395 { LOCKFS_OLOCK
, "Old Lock" },
396 { LOCKFS_BADLOCK
, NULL
}
402 * ufs_fault, ufs_fault_v
404 * called instead of cmn_err(CE_PANIC, ...) by ufs routines
405 * when a failure is detected to put the file system into an
406 * error state (if possible) or to devolve to a panic otherwise
408 * vnode is some vnode in this file system, used to find the way
409 * to ufsvfs, vfsp etc. Since a panic can be called from many
410 * levels, the vnode is the most convenient hook to pass through.
416 ufs_fault(vnode_t
*vp
, char *fmt
, ...)
421 MINOR(("[ufs_fault"));
424 error
= ufs_fault_v(vp
, fmt
, adx
);
427 MINOR((": %s (%d)]\n", err_name(error
), error
));
431 const char *nullfmt
= "<null format?>";
434 ufs_fault_v(vnode_t
*vp
, char *fmt
, va_list adx
)
436 ufs_failure_t
*new = NULL
;
442 MINOR(("[ufs_fault_v"));
445 fmt
= (char *)nullfmt
;
450 ufsvfsp
= (struct ufsvfs
*)vp
->v_vfsp
->vfs_data
;
453 * Something bad has happened. That is why we are here.
455 * In order for the bad thing to be recorded in the superblock
456 * we need to write to the superblock directly.
457 * In the case that logging is enabled the logging code
458 * would normally intercept our write as a delta to the log,
459 * thus we mark the filesystem FSBAD in any case.
461 need_vfslock
= !MUTEX_HELD(&ufsvfsp
->vfs_lock
);
464 mutex_enter(&ufsvfsp
->vfs_lock
);
467 ufsvfsp
->vfs_fs
->fs_clean
= FSBAD
;
468 ASSERT(SEMA_HELD(&ufsvfsp
->vfs_bufp
->b_sem
));
469 ufsvfsp
->vfs_bufp
->b_flags
&=
470 ~(B_ASYNC
| B_READ
| B_DONE
| B_ERROR
| B_DELWRI
);
472 (void) bdev_strategy(ufsvfsp
->vfs_bufp
);
473 (void) biowait(ufsvfsp
->vfs_bufp
);
476 mutex_exit(&ufsvfsp
->vfs_lock
);
484 case TRIAGE_NO_SPIRIT
:
486 real_panic_v(new, fmt
, adx
);
487 /* LINTED: warning: logical expression always true: op "||" */
492 if (!(DEBUG_FLAGS
& DBGFLG_FIXWOULDPANIC
)) {
502 case TRIAGE_ATTEND_TO
:
504 /* q thread not running yet? */
505 if (mutex_tryenter(&ufs_fix
.uq_mutex
)) {
506 if (!ufs_fix
.uq_threadp
) {
507 mutex_exit(&ufs_fix
.uq_mutex
);
508 ufs_thread_start(&ufs_fix
,
509 ufsfx_thread_fix_failures
, NULL
);
510 ufs_fix
.uq_threadp
->t_flag
|= T_DONTBLOCK
;
511 mutex_enter(&ufs_fix
.uq_mutex
);
514 * We got the lock but we are not the current
515 * threadp so we have to release the lock.
517 mutex_exit(&ufs_fix
.uq_mutex
);
520 MINOR((": fix failure thread already running "));
522 * No need to log another failure as one is already
528 if (ufs_fix
.uq_threadp
&& ufs_fix
.uq_threadp
== curthread
) {
529 mutex_exit(&ufs_fix
.uq_mutex
);
530 cmn_err(CE_WARN
, "ufs_fault_v: recursive ufs_fault");
533 * Must check if we actually still own the lock and
534 * if so then release the lock and move on with life.
536 if (mutex_owner(&ufs_fix
.uq_mutex
) == curthread
)
537 mutex_exit(&ufs_fix
.uq_mutex
);
540 new = init_failure(vp
, fmt
, adx
);
545 real_panic_v(new, fmt
, adx
);
556 * Attempt to fix iff:
557 * - the system is not already panicking
558 * - this file system isn't explicitly marked not to be fixed
559 * - we can connect to the user-level daemon
560 * These conditions are detectable later, but if we can determine
561 * them in the failing threads context the core dump may be more
577 ": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr
));
578 return (TRIAGE_DEAD
);
581 if (!vp
|| !(ip
= VTOI(vp
)) || !ip
->i_ufsvfs
) {
583 ": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n"));
584 return (TRIAGE_DEAD
);
587 /* use tryenter and continue no matter what since we're panicky */
588 need_unlock_vfs
= !MUTEX_HELD(&ip
->i_ufsvfs
->vfs_lock
);
590 need_unlock_vfs
= mutex_tryenter(&ip
->i_ufsvfs
->vfs_lock
);
592 fs_flags
= ip
->i_ufsvfs
->vfs_fsfx
.fx_flags
;
594 mutex_exit(&ip
->i_ufsvfs
->vfs_lock
);
596 if (fs_flags
& UFSFX_PANIC
) {
598 ": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n"));
599 return (TRIAGE_NO_SPIRIT
);
602 if (ufs_checkaccton(vp
) != 0) {
604 ": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n"));
605 return (TRIAGE_DEAD
);
608 if (ufs_checkswapon(vp
) != 0) {
610 ": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n"));
611 return (TRIAGE_DEAD
);
614 MINUTE((": return TRIAGE_ATTEND_TO] "));
615 return (TRIAGE_ATTEND_TO
);
621 * This routine allocates a failure struct and initializes
622 * it's member elements.
623 * Space is allocated for copies of dynamic identifying fs structures
624 * passed in. Without a much more segmented kernel architecture
625 * this is as protected as we can make it (for now.)
627 static ufs_failure_t
*
628 init_failure(vnode_t
*vp
, char *fmt
, va_list adx
)
632 int initialization_worked
= 0;
635 MINOR(("[init_failure"));
637 new = kmem_zalloc(sizeof (ufs_failure_t
), KM_NOSLEEP
);
639 MINOR((": kmem_zalloc failed]\n"));
644 * enough information to make a fix attempt possible?
646 if (!vp
|| !(ip
= VTOI(vp
)) || !ip
->i_ufsvfs
|| !vp
->v_vfsp
||
647 !ip
->i_ufsvfs
->vfs_bufp
|| !ITOF(ip
) || !fmt
)
650 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VDIR
&&
651 vp
->v_type
!= VBLK
&& vp
->v_type
!= VCHR
&&
652 vp
->v_type
!= VLNK
&& vp
->v_type
!= VFIFO
&&
656 if (ip
->i_ufsvfs
->vfs_root
->v_type
!= VREG
&&
657 ip
->i_ufsvfs
->vfs_root
->v_type
!= VDIR
&&
658 ip
->i_ufsvfs
->vfs_root
->v_type
!= VBLK
&&
659 ip
->i_ufsvfs
->vfs_root
->v_type
!= VCHR
&&
660 ip
->i_ufsvfs
->vfs_root
->v_type
!= VLNK
&&
661 ip
->i_ufsvfs
->vfs_root
->v_type
!= VFIFO
&&
662 ip
->i_ufsvfs
->vfs_root
->v_type
!= VSOCK
)
665 if ((ITOF(ip
)->fs_magic
!= FS_MAGIC
) &&
666 (ITOF(ip
)->fs_magic
!= MTB_UFS_MAGIC
))
669 /* intialize values */
671 (void) vsnprintf(new->uf_panic_str
, LOCKFS_MAXCOMMENTLEN
- 1, fmt
, adx
);
673 new->uf_ufsvfsp
= ip
->i_ufsvfs
;
674 new->uf_vfsp
= ip
->i_vfs
;
676 mutex_init(&new->uf_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
677 need_vfs_unlock
= !MUTEX_HELD(&ip
->i_ufsvfs
->vfs_lock
);
679 if (need_vfs_unlock
) {
680 if (!mutex_tryenter(&ip
->i_ufsvfs
->vfs_lock
)) {
682 * not much alternative here, but we're panicking
683 * already, it couldn't be worse - so just
684 * proceed optimistically and take note.
686 mutex_enter(&uf_stats
.ufst_mutex
);
687 uf_stats
.ufst_lock_violations
++;
688 mutex_exit(&uf_stats
.ufst_mutex
);
689 MINOR((": couldn't get vfs lock"))
694 if (mutex_tryenter(&new->uf_mutex
)) {
695 initialization_worked
= set_state(new, UF_INIT
);
696 mutex_exit(&new->uf_mutex
);
700 mutex_exit(&ip
->i_ufsvfs
->vfs_lock
);
702 if (initialization_worked
) {
710 kmem_free(new, sizeof (ufs_failure_t
));
711 MINOR((": failed]\n"));
716 queue_failure(ufs_failure_t
*new)
718 MINOR(("[queue_failure"));
720 mutex_enter(&ufs_fix
.uq_mutex
);
722 if (ufs_fix
.uq_ufhead
)
723 insque(new, &ufs_fix
.uq_ufhead
);
725 ufs_fix
.uq_ufhead
= new;
727 if (mutex_tryenter(&new->uf_mutex
)) {
728 (void) set_state(new, UF_QUEUE
);
729 mutex_exit(&new->uf_mutex
);
732 mutex_enter(&uf_stats
.ufst_mutex
); /* force wakeup */
733 ufs_fix
.uq_ne
= ufs_fix
.uq_lowat
= uf_stats
.ufst_num_failed
;
734 mutex_exit(&uf_stats
.ufst_mutex
);
736 cv_broadcast(&ufs_fix
.uq_cv
);
738 DCALL(DBGLVL_MAJOR
, cmn_err(CE_WARN
, new->uf_panic_str
?
739 new->uf_panic_str
: "queue_failure: NULL panic str?"));
740 mutex_exit(&ufs_fix
.uq_mutex
);
747 real_panic(ufs_failure_t
*f
, const char *fmt
, ...)
751 MINUTE(("[real_panic "));
754 real_panic_v(f
, fmt
, adx
);
757 MINUTE((": return?!]\n"));
761 real_panic_v(ufs_failure_t
*f
, const char *fmt
, va_list adx
)
763 int seriousness
= CE_PANIC
;
766 MINUTE(("[real_panic_v "));
768 if (f
&& f
->uf_ufsvfsp
)
769 TRANS_SETERROR(f
->uf_ufsvfsp
);
772 if (DEBUG_FLAGS
& DBGFLG_NOPANIC
) {
773 seriousness
= CE_WARN
;
774 cmn_err(CE_WARN
, "real_panic: EWOULDPANIC\n");
778 ddi_msleep(500); /* allow previous warnings to get out */
781 vcmn_err(seriousness
, fmt
, adx
);
783 cmn_err(seriousness
, f
&& f
->uf_panic_str
? f
->uf_panic_str
:
784 "real_panic: <unknown panic?>");
787 need_unlock
= !MUTEX_HELD(&f
->uf_mutex
);
789 mutex_enter(&f
->uf_mutex
);
793 (void) set_state(f
, UF_PANIC
);
796 mutex_exit(&f
->uf_mutex
);
799 MINUTE((": return?!]\n"));
803 * initializes ufs panic structs, locks, etc
809 MINUTE(("[ufsfx_init"));
811 /* patchable; unchanged while running, so no lock is needed */
812 ufsfx_tune
.uft_too_long
= UF_TOO_LONG
;
813 ufsfx_tune
.uft_fixstart_period
= UF_FIXSTART_PERIOD
;
814 ufsfx_tune
.uft_fixpoll_period
= UF_FIXPOLL_PERIOD
;
815 ufsfx_tune
.uft_short_err_period
= UF_SHORT_ERROR_PERIOD
;
816 ufsfx_tune
.uft_long_err_period
= UF_LONG_ERROR_PERIOD
;
818 uffsinfo
.ufi_statp
= &uf_stats
;
819 uffsinfo
.ufi_tunep
= &ufsfx_tune
;
820 uffsinfo
.ufi_statetab
= &state_desc
[0];
822 mutex_init(&uf_stats
.ufst_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
823 ufs_thread_init(&ufs_fix
, /* maxne */ 1);
829 * initializes per-ufs values
830 * returns 0 (ok) or errno
833 ufsfx_mount(struct ufsvfs
*ufsvfsp
, int flags
)
835 MINUTE(("[ufsfx_mount (%d)", flags
));
836 /* don't check/need vfs_lock because it's still being initialized */
838 ufsvfsp
->vfs_fsfx
.fx_flags
= (flags
& UFSMNT_ONERROR_FLGMASK
) >> 4;
840 MINUTE((": %s: fx_flags:%ld,",
841 ufsvfsp
->vfs_fs
->fs_fsmnt
, ufsvfsp
->vfs_fsfx
.fx_flags
));
843 * onerror={panic ^ lock only ^ unmount}
846 if (ufsvfsp
->vfs_fsfx
.fx_flags
& UFSFX_PANIC
) {
849 } else if (ufsvfsp
->vfs_fsfx
.fx_flags
& UFSFX_LCKONLY
) {
850 MINUTE((" LCKONLY"));
852 } else if (ufsvfsp
->vfs_fsfx
.fx_flags
& UFSFX_LCKUMOUNT
) {
853 MINUTE((" LCKUMOUNT"));
856 ufsvfsp
->vfs_fsfx
.fx_flags
= UFSFX_DEFAULT
;
857 ASSERT(ufsvfsp
->vfs_fsfx
.fx_flags
&
858 (UFSMNT_ONERROR_FLGMASK
>> 4));
859 MINUTE((" DEFAULT"));
862 pollwakeup(&ufs_pollhd
, POLLPRI
);
870 * called during unmount
873 ufsfx_unmount(struct ufsvfs
*ufsvfsp
)
876 int must_unlock_list
;
878 MINUTE(("[ufsfx_unmount"));
881 MINUTE((": no ufsvfsp]"));
885 if ((must_unlock_list
= !MUTEX_HELD(&ufs_fix
.uq_mutex
)) != 0)
886 mutex_enter(&ufs_fix
.uq_mutex
);
888 for (f
= ufs_fix
.uq_ufhead
; f
; f
= f
->uf_next
) {
889 int must_unlock_failure
;
891 must_unlock_failure
= !MUTEX_HELD(&f
->uf_mutex
);
892 if (must_unlock_failure
) {
893 mutex_enter(&f
->uf_mutex
);
896 if (f
->uf_ufsvfsp
== ufsvfsp
) {
899 * if we owned the failure record lock, then this
900 * is probably a fix failure-triggered unmount, so
901 * the warning is not appropriate or needed
904 /* XXX if rebooting don't print this? */
905 if (!terminal_state(f
->uf_s
) && must_unlock_failure
) {
907 "Unmounting %s while error-locked",
911 f
->uf_ufsvfsp
= NULL
;
912 f
->uf_vfs_ufsfxp
= NULL
;
913 f
->uf_vfs_lockp
= NULL
;
919 if (must_unlock_failure
)
920 mutex_exit(&f
->uf_mutex
);
922 if (must_unlock_list
)
923 mutex_exit(&ufs_fix
.uq_mutex
);
925 pollwakeup(&ufs_pollhd
, POLLPRI
| POLLHUP
);
932 * provides hook from lockfs code so we can recognize unlock/relock
933 * This is called after it is certain that the (un)lock will succeed.
936 ufsfx_unlockfs(struct ufsvfs
*ufsvfsp
)
940 int need_unlock_list
;
943 MINUTE(("[ufsfx_unlockfs"));
948 need_unlock_list
= !MUTEX_HELD(&ufs_fix
.uq_mutex
);
950 if (need_unlock_list
)
951 mutex_enter(&ufs_fix
.uq_mutex
);
953 for (f
= ufs_fix
.uq_ufhead
; f
; f
= f
->uf_next
) {
955 need_unlock
= !MUTEX_HELD(&f
->uf_mutex
);
957 mutex_enter(&f
->uf_mutex
);
959 if (f
->uf_ufsvfsp
== ufsvfsp
&& !terminal_state(f
->uf_s
)) {
960 if (!(f
->uf_s
& UF_FIXING
)) {
962 * This might happen if we don't notice that
963 * the fs gets marked FSFIX before it is
964 * marked FSCLEAN, as might occur if the
965 * the superblock was hammered directly.
970 "Unlock of %s succeeded before "
971 "fs_clean marked FSFIX?",
976 * pass through fixing state so
977 * transition protocol is satisfied
979 if (!set_state(f
, UF_FIXING
)) {
980 MINUTE((": failed] "));
984 if (!set_state(f
, UF_FIXED
)) {
985 /* it's already fixed, so don't panic now */
986 MINUTE((": failed] "));
991 mutex_exit(&f
->uf_mutex
);
993 if (need_unlock_list
)
994 mutex_exit(&ufs_fix
.uq_mutex
);
999 ufsfx_lockfs(struct ufsvfs
*ufsvfsp
)
1003 int need_unlock_list
;
1005 MINUTE(("[ufsfx_lockfs"));
1010 need_unlock_list
= !MUTEX_HELD(&ufs_fix
.uq_mutex
);
1012 if (need_unlock_list
)
1013 mutex_enter(&ufs_fix
.uq_mutex
);
1015 for (f
= ufs_fix
.uq_ufhead
; f
; f
= f
->uf_next
) {
1017 need_unlock
= !MUTEX_HELD(&f
->uf_mutex
);
1019 mutex_enter(&f
->uf_mutex
);
1021 if (f
->uf_ufsvfsp
== ufsvfsp
&& !terminal_state(f
->uf_s
) &&
1022 f
->uf_s
!= UF_PANIC
) {
1027 "fs %s not in state "
1028 "UF_TRYLCK, UF_LOCKED or UF_FIXING",
1033 if (!set_state(f
, UF_LOCKED
)) {
1034 MINUTE((": failed] "));
1039 if (!set_state(f
, UF_FIXING
)) {
1040 MINUTE((": failed] "));
1051 mutex_exit(&f
->uf_mutex
);
1053 if (need_unlock_list
)
1054 mutex_exit(&ufs_fix
.uq_mutex
);
1060 * error lock, trigger fsck and unlock those fs with failures
1061 * blatantly copied from the hlock routine, although this routine
1062 * triggers differently in order to use uq_ne as meaningful data.
1066 ufsfx_thread_fix_failures(void *ignored
)
1069 callb_cpr_t cprinfo
;
1071 CALLB_CPR_INIT(&cprinfo
, &ufs_fix
.uq_mutex
, callb_generic_cpr
,
1074 MINUTE(("[ufsfx_thread_fix_failures] "));
1077 /* sleep until there is work to do */
1079 mutex_enter(&ufs_fix
.uq_mutex
);
1080 (void) ufs_thread_run(&ufs_fix
, &cprinfo
);
1082 mutex_exit(&ufs_fix
.uq_mutex
);
1084 /* process failures on our q */
1086 retry
= ufsfx_do_failure_q();
1088 mutex_enter(&ufs_fix
.uq_mutex
);
1089 CALLB_CPR_SAFE_BEGIN(&cprinfo
);
1090 (void) cv_reltimedwait(&ufs_fix
.uq_cv
,
1091 &ufs_fix
.uq_mutex
, (hz
* retry
),
1093 CALLB_CPR_SAFE_END(&cprinfo
,
1095 mutex_exit(&ufs_fix
.uq_mutex
);
1104 * watch for fix-on-panic work
1106 * returns # of seconds to sleep before trying again
1107 * and zero if no retry is needed
1111 ufsfx_do_failure_q(void)
1117 MAJOR(("[ufsfx_do_failure_q"));
1118 DCALL(DBGLVL_HIDEOUS
, dump_uf_list(NULL
));
1120 if (!mutex_tryenter(&ufs_fix
.uq_mutex
))
1127 * walk down failure list
1128 * depending on state of each failure, do whatever
1129 * is appropriate to move it to the next state
1130 * taking note of whether retry gets set
1133 * wakeup in shortest required time for any failure
1134 * retry == 0; nothing more to do (terminal state)
1135 * retry < 0; reprocess queue immediately, retry will
1136 * be abs(retry) for the next cycle
1137 * retry > 0; schedule wakeup for retry seconds
1140 for (f
= ufs_fix
.uq_ufhead
; f
; f
= f
->uf_next
) {
1142 if (!mutex_tryenter(&f
->uf_mutex
)) {
1146 s
= get_state_desc(f
->uf_s
);
1148 MINOR((": found%s: %s, \"%s: %s\"\n",
1149 s
->ud_attr
.terminal
? " old" : "",
1150 fs_name(f
), state_name(f
->uf_s
), f
->uf_panic_str
));
1152 if (s
->ud_attr
.terminal
) {
1153 mutex_exit(&f
->uf_mutex
);
1158 (*s
->ud_sfp
)(f
, UFA_FOUND
, f
->uf_s
);
1160 ASSERT(terminal_state(f
->uf_s
) || f
->uf_retry
!= 0);
1162 if (f
->uf_retry
!= 0) {
1163 if (retry
> f
->uf_retry
|| retry
== 0)
1164 retry
= f
->uf_retry
;
1165 if (f
->uf_retry
< 0)
1166 f
->uf_retry
= abs(f
->uf_retry
);
1168 mutex_exit(&f
->uf_mutex
);
1177 mutex_exit(&ufs_fix
.uq_mutex
);
1179 DCALL(DBGLVL_HIDEOUS
, dump_uf_list(NULL
));
1180 MAJOR((": retry=%ld, good night]\n\n", retry
));
1186 pester_msg(ufs_failure_t
*f
, int seriousness
)
1188 MINUTE(("[pester_msg"));
1189 ASSERT(f
->uf_s
& (UF_LOCKED
| UF_FIXING
));
1192 * XXX if seems too long for this fs, poke administrator
1193 * XXX to run fsck manually (and change retry time?)
1195 cmn_err(seriousness
, "Waiting for repair of %s to %s",
1196 fs_name(f
), f
->uf_s
& UF_LOCKED
? "start" : "finish");
1201 trylock_time_exceeded(ufs_failure_t
*f
)
1206 MINUTE(("[trylock_time_exceeded"));
1207 ASSERT(MUTEX_HELD(&f
->uf_mutex
));
1209 toolong
= (time_t)ufsfx_tune
.uft_too_long
+ f
->uf_entered_tm
;
1211 cmn_err(CE_WARN
, "error-lock timeout exceeded: %s", fs_name(f
));
1214 return (time
<= toolong
? 0: time
- toolong
);
1218 get_lockfs_status(ufs_failure_t
*f
, struct lockfs
*lfp
)
1220 MINUTE(("[get_lockfs_status"));
1222 if (!f
->uf_ufsvfsp
) {
1223 MINUTE((": ufsvfsp is NULL]\n"));
1227 ASSERT(MUTEX_HELD(&f
->uf_mutex
));
1228 ASSERT(MUTEX_NOT_HELD(f
->uf_vfs_lockp
));
1229 ASSERT(!vfs_lock_held(f
->uf_vfsp
));
1230 ASSERT(f
->uf_ufsvfsp
->vfs_root
!= NULL
);
1232 f
->uf_lf_err
= ufs_fiolfss(f
->uf_ufsvfsp
->vfs_root
, lfp
);
1235 f
->uf_retry
= ufsfx_tune
.uft_short_err_period
;
1243 set_state(ufs_failure_t
*f
, ufs_failure_states_t new_state
)
1246 sfrc_t sfrc
= SFRC_FAIL
;
1250 HIDEOUS(("[set_state: new state:%s", state_name(new_state
)));
1252 ASSERT(MUTEX_HELD(&f
->uf_mutex
));
1255 * if someone else is panicking, just let panic sync proceed
1258 (void) set_state(f
, UF_NOTFIX
);
1259 HIDEOUS((": state reset: not fixed] "));
1264 * bad state transition, an internal error
1266 if (!state_trans_valid(f
->uf_s
, new_state
)) {
1268 if (!(f
->uf_s
& UF_PANIC
) && !(new_state
& UF_PANIC
))
1269 (void) set_state(f
, UF_PANIC
);
1270 MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ",
1271 state_name(f
->uf_s
), state_name(new_state
)));
1275 s
= get_state_desc(new_state
);
1277 need_unlock
= !MUTEX_HELD(&ufs_fix
.uq_mutex
);
1279 mutex_enter(&ufs_fix
.uq_mutex
);
1281 if (s
->ud_attr
.at_fail
&& ufs_fix
.uq_threadp
&&
1282 curthread
== ufs_fix
.uq_threadp
) {
1283 cmn_err(CE_WARN
, "set_state: probable recursive panic of %s",
1287 mutex_exit(&ufs_fix
.uq_mutex
);
1289 /* NULL state functions always succeed */
1290 sfrc
= !s
->ud_sfp
? SFRC_SUCCESS
: (*s
->ud_sfp
)(f
, UFA_SET
, new_state
);
1292 if (sfrc
== SFRC_SUCCESS
&& f
->uf_s
!= new_state
) {
1293 f
->uf_s
= new_state
;
1294 f
->uf_entered_tm
= time
;
1303 get_state_desc(ufs_failure_states_t state
)
1307 HIDEOUS(("[get_state_desc"));
1309 for (s
= &state_desc
[1]; s
->ud_name
!= NULL
; s
++) {
1310 if (s
->ud_v
== state
) {
1317 return (&state_desc
[0]); /* default */
1321 sf_undef(ufs_failure_t
*f
, ufsa_t a
, ufs_failure_states_t s
)
1325 TRIVIA(("[sf_undef, action is %s, state is %s\n",
1326 act_name(a
), state_name(s
)));
1327 ASSERT(s
== UF_UNDEF
);
1329 /* shouldn't find null failure records or ever set one */
1330 rc
= set_state(f
, UF_NOTFIX
);
1341 ufs_failure_states_t s
)
1343 sfrc_t rc
= SFRC_FAIL
;
1346 TRIVIA(("[sf_init, action is %s", act_name(a
)));
1347 ASSERT(s
& UF_INIT
);
1351 f
->uf_begin_tm
= time
;
1353 if (!f
->uf_ufsvfsp
) {
1354 (void) set_state(f
, UF_PANIC
);
1355 TRIVIA((": NULL ufsvfsp]\n"));
1359 * because we can call panic from many different levels,
1360 * we can't be sure that we've got the vfs_lock at this
1361 * point. However, there's not much alternative and if
1362 * we don't (have the lock) the worst case is we'll just
1365 f
->uf_vfs_lockp
= &f
->uf_ufsvfsp
->vfs_lock
;
1366 f
->uf_vfs_ufsfxp
= &f
->uf_ufsvfsp
->vfs_fsfx
;
1368 if (!f
->uf_ufsvfsp
->vfs_bufp
) {
1369 (void) set_state(f
, UF_PANIC
);
1370 TRIVIA((": NULL vfs_bufp]\n"));
1373 f
->uf_bp
= f
->uf_ufsvfsp
->vfs_bufp
;
1375 if (!f
->uf_ufsvfsp
->vfs_bufp
->b_un
.b_fs
) {
1376 (void) set_state(f
, UF_PANIC
);
1377 TRIVIA((": NULL vfs_fs]\n"));
1381 /* vfs_fs = vfs_bufp->b_un.b_fs */
1382 bcopy(f
->uf_ufsvfsp
->vfs_fs
->fs_fsmnt
, f
->uf_fsname
, MAXMNTLEN
);
1384 f
->uf_lf
.lf_lock
= LOCKFS_ELOCK
; /* primer */
1386 if (!f
->uf_vfsp
|| f
->uf_vfsp
->vfs_dev
== NODEV
) {
1387 (void) set_state(f
, UF_PANIC
);
1388 TRIVIA((": NULL vfsp or vfs_dev == NODEV"));
1391 f
->uf_dev
= f
->uf_vfsp
->vfs_dev
;
1398 /* failures marked init shouldn't even be on the queue yet */
1399 rc
= set_state(f
, UF_QUEUE
);
1400 TRIVIA((": found failure with state init]\n"));
1411 ufs_failure_states_t s
)
1413 sfrc_t rc
= SFRC_FAIL
;
1415 TRIVIA(("[sf_queue, action is %s", act_name(a
)));
1416 ASSERT(s
& UF_QUEUE
);
1418 if (!f
->uf_ufsvfsp
) {
1419 TRIVIA((": NULL ufsvfsp]\n"));
1425 rc
= sf_found_queue(f
);
1430 ASSERT(MUTEX_HELD(&ufs_fix
.uq_mutex
));
1432 mutex_enter(&uf_stats
.ufst_mutex
);
1433 uf_stats
.ufst_num_failed
++;
1434 mutex_exit(&uf_stats
.ufst_mutex
);
1437 * if can't get the vfs lock, just wait until
1438 * UF_TRYLCK to set fx_current
1440 if (mutex_tryenter(f
->uf_vfs_lockp
)) {
1441 f
->uf_vfs_ufsfxp
->fx_current
= f
;
1442 mutex_exit(f
->uf_vfs_lockp
);
1444 mutex_enter(&uf_stats
.ufst_mutex
);
1445 uf_stats
.ufst_current_races
++;
1446 mutex_exit(&uf_stats
.ufst_mutex
);
1455 (void) set_state(f
, UF_PANIC
);
1456 TRIVIA((": failed] "));
1463 sf_found_queue(ufs_failure_t
*f
)
1466 sfrc_t rc
= SFRC_FAIL
;
1468 TRIVIA(("[sf_found_queue"));
1471 * don't need to check for null ufsvfsp because
1472 * unmount must own list's ufs_fix.uq_mutex
1473 * to mark it null and we own that lock since
1477 ASSERT(MUTEX_HELD(&ufs_fix
.uq_mutex
));
1478 ASSERT(MUTEX_NOT_HELD(f
->uf_vfs_lockp
));
1480 if (!mutex_tryenter(f
->uf_vfs_lockp
)) {
1481 TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1486 replica
= f
->uf_vfs_ufsfxp
&& f
->uf_vfs_ufsfxp
->fx_current
!= NULL
&&
1487 f
->uf_vfs_ufsfxp
->fx_current
!= f
&&
1488 !terminal_state(f
->uf_vfs_ufsfxp
->fx_current
->uf_s
);
1491 * copy general flags to this ufs_failure so we don't
1492 * need to refer back to the ufsvfs, or, more importantly,
1493 * don't need to keep acquiring (trying to acquire) vfs_lockp
1495 * The most restrictive option wins:
1496 * panic > errlock only > errlock+unmount > repair
1497 * XXX panic > elock > elock > elock+umount
1499 if (f
->uf_vfs_ufsfxp
->fx_flags
& UFSFX_PANIC
) {
1500 if (!set_state(f
, UF_PANIC
)) {
1501 TRIVIA((": marked panic but was queued?"));
1505 mutex_exit(f
->uf_vfs_lockp
);
1508 f
->uf_flags
= f
->uf_vfs_ufsfxp
->fx_flags
;
1511 if (!set_state(f
, UF_REPLICA
)) {
1513 TRIVIA((": set to replica failed] "));
1517 mutex_exit(f
->uf_vfs_lockp
);
1520 mutex_exit(f
->uf_vfs_lockp
);
1522 if (!set_state(f
, UF_TRYLCK
)) {
1523 TRIVIA((": failed] "));
1531 sf_nonterm_cmn(ufs_failure_t
*f
, ufsa_t a
, ufs_failure_states_t s
)
1533 sfrc_t rc
= SFRC_FAIL
;
1535 TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a
), state_name(s
)));
1536 ASSERT(s
& (UF_TRYLCK
| UF_LOCKED
| UF_UMOUNT
| UF_FIXING
));
1537 ASSERT(!terminal_state(s
));
1539 if (!f
->uf_ufsvfsp
&& !(f
->uf_s
& UF_UMOUNT
)) {
1540 TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n"));
1541 (void) set_state(f
, UF_NOTFIX
);
1549 ASSERT(MUTEX_NOT_HELD(f
->uf_vfs_lockp
));
1550 rc
= sf_set_trylck(f
);
1554 rc
= sf_set_locked(f
);
1558 f
->uf_flags
|= UFSFX_REPAIR_START
;
1559 f
->uf_retry
= ufsfx_tune
.uft_fixpoll_period
;
1564 f
->uf_retry
= -ufsfx_tune
.uft_short_err_period
;
1569 (void) set_state(f
, UF_PANIC
);
1570 TRIVIA((": failed] "));
1578 rc
= sf_found_trylck(f
);
1583 rc
= sf_found_lock_fix_cmn(f
, s
);
1587 rc
= sf_found_umount(f
);
1591 (void) set_state(f
, UF_PANIC
);
1592 TRIVIA((": failed] "));
1597 (void) set_state(f
, UF_PANIC
);
1598 TRIVIA((": failed] "));
1607 sf_set_trylck(ufs_failure_t
*f
)
1609 TRIVIA(("[sf_set_trylck"));
1611 if (!mutex_tryenter(f
->uf_vfs_lockp
)) {
1612 TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1617 if (!f
->uf_vfs_ufsfxp
->fx_current
)
1618 f
->uf_vfs_ufsfxp
->fx_current
= f
;
1620 mutex_exit(f
->uf_vfs_lockp
);
1622 f
->uf_lf
.lf_flags
= 0;
1623 f
->uf_lf
.lf_lock
= LOCKFS_ELOCK
;
1624 f
->uf_retry
= -ufsfx_tune
.uft_fixstart_period
;
1626 return (SFRC_SUCCESS
);
1630 sf_found_trylck(ufs_failure_t
*f
)
1632 struct lockfs lockfs_status
;
1634 TRIVIA(("[sf_found_trylck"));
1636 if (trylock_time_exceeded(f
) > 0) {
1637 (void) set_state(f
, UF_PANIC
);
1638 TRIVIA((": failed] "));
1642 if (!get_lockfs_status(f
, &lockfs_status
)) {
1643 (void) set_state(f
, UF_PANIC
);
1644 TRIVIA((": failed] "));
1648 if (f
->uf_lf_err
== NO_ERROR
)
1649 f
->uf_lf
.lf_key
= lockfs_status
.lf_key
;
1651 if (!set_lockfs(f
, &lockfs_status
)) {
1652 (void) set_state(f
, UF_PANIC
);
1653 TRIVIA((": failed] "));
1657 return (SFRC_SUCCESS
);
1661 sf_set_locked(ufs_failure_t
*f
)
1663 TRIVIA(("[sf_set_locked"));
1665 f
->uf_retry
= -ufsfx_tune
.uft_fixstart_period
;
1668 if (f
->uf_flags
& UFSFX_REPAIR_START
)
1669 TRIVIA(("clearing UFSFX_REPAIR_START "));
1672 f
->uf_flags
&= ~UFSFX_REPAIR_START
;
1674 if (f
->uf_s
& UF_TRYLCK
) {
1675 cmn_err(CE_WARN
, "Error-locked %s: \"%s\"",
1676 fs_name(f
), f
->uf_panic_str
);
1678 if (f
->uf_flags
& UFSFX_LCKONLY
)
1679 cmn_err(CE_WARN
, "Manual repair of %s required",
1684 * just reset to current state
1687 TRIVIA(("locked->locked "));
1691 return (SFRC_SUCCESS
);
1695 sf_found_lock_fix_cmn(ufs_failure_t
*f
, ufs_failure_states_t s
)
1699 struct buf
*bp
= NULL
;
1701 time_t concerned
, anxious
;
1702 sfrc_t rc
= SFRC_FAIL
;
1705 TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s
)));
1707 if (s
& UF_LOCKED
) {
1708 ASSERT(MUTEX_HELD(&f
->uf_mutex
));
1711 time
> (ufsfx_tune
.uft_too_long
+ f
->uf_entered_tm
);
1712 TRIVIA(("%stoolong", !toolong
? "not": ""));
1713 HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ",
1714 time
, ufsfx_tune
.uft_too_long
, f
->uf_entered_tm
));
1716 if (f
->uf_flags
& UFSFX_LCKUMOUNT
) {
1717 if (set_state(f
, UF_UMOUNT
)) {
1721 TRIVIA((": failed] "));
1729 if (!(f
->uf_flags
& UFSFX_REPAIR_START
)) {
1730 cmn_err(CE_WARN
, "%s repair of %s not started.",
1731 (f
->uf_flags
& UFSFX_LCKONLY
) ?
1732 "Manual" : "Automatic", fs_name(f
));
1734 f
->uf_retry
= ufsfx_tune
.uft_long_err_period
;
1736 f
->uf_retry
= ufsfx_tune
.uft_long_err_period
;
1737 cmn_err(CE_WARN
, "Repair of %s is not timely; "
1738 "operator attention is required.",
1748 ASSERT(s
& UF_FIXING
);
1753 * get on disk superblock; force it to really
1754 * come from the disk
1756 (void) bfinval(f
->uf_dev
, 0);
1757 bp
= UFS_BREAD(f
->uf_ufsvfsp
, f
->uf_dev
, SBLOCK
, SBSIZE
);
1759 bp
->b_flags
|= (B_STALE
| B_AGE
);
1760 dfs
= bp
->b_un
.b_fs
;
1763 if (!bp
|| (bp
->b_flags
& B_ERROR
) || ((dfs
->fs_magic
!= FS_MAGIC
) &&
1764 (dfs
->fs_magic
!= MTB_UFS_MAGIC
))) {
1765 TRIVIA((": UFS_BREAD(SBLOCK) failed]\n"));
1770 /* fsck started but we haven't noticed yet? */
1771 if (!(s
& UF_FIXING
) && dfs
->fs_clean
== FSFIX
) {
1772 if (!set_state(f
, UF_FIXING
)) {
1773 TRIVIA((": failed]\n"));
1779 /* fsck started but didn't succeed? */
1780 if ((s
& UF_FIXING
) && ((dfs
->fs_clean
== FSBAD
) || !fsck_active(f
))) {
1781 TRIVIA((": fs_clean: %d", (int)dfs
->fs_clean
));
1782 (void) set_state(f
, UF_LOCKED
);
1783 cmn_err(CE_WARN
, "%s: Manual repair is necessary.", fs_name(f
));
1784 f
->uf_retry
= ufsfx_tune
.uft_long_err_period
;
1788 gb_size
= (dfs
->fs_size
* dfs
->fs_bshift
) / GB
;
1789 toolong
= (time_t)((gb_size
== 0? 1: gb_size
) * SecondsPerGig
);
1791 /* fsck started but doesn't seem to be proceeding? */
1792 if ((s
& UF_FIXING
) && dfs
->fs_clean
== FSFIX
) {
1793 if (time
> f
->uf_entered_tm
+ toolong
) {
1796 "Repair completion timeout exceeded on %s; "
1797 "manual fsck may be required", fs_name(f
));
1798 f
->uf_retry
= ufsfx_tune
.uft_long_err_period
;
1802 concerned
= f
->uf_entered_tm
+ (toolong
/ 3);
1803 anxious
= f
->uf_entered_tm
+ ((2 * toolong
) / 3);
1805 if (time
> concerned
)
1806 pester_msg(f
, time
> anxious
? CE_WARN
: CE_NOTE
);
1818 sf_found_umount(ufs_failure_t
*f
)
1821 sfrc_t rc
= SFRC_FAIL
;
1822 struct vfs
*vfsp
= f
->uf_vfsp
;
1823 struct ufsvfs
*ufsvfsp
= f
->uf_ufsvfsp
;
1827 TRIVIA(("[sf_found_umount"));
1829 toolong
= time
> ufsfx_tune
.uft_too_long
+ f
->uf_entered_tm
;
1831 TRIVIA((": unmount time limit exceeded] "));
1835 if (!vfsp
|| !ufsvfsp
) { /* trivial case */
1836 TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] "));
1840 if (!ULOCKFS_IS_ELOCK(&ufsvfsp
->vfs_ulockfs
)) {
1841 TRIVIA((": !not error locked?"));
1846 /* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */
1847 if (vn_vfswlock(vfsp
->vfs_vnodecovered
)) {
1848 TRIVIA((": couldn't lock coveredvp"));
1853 if ((err
= dounmount(vfsp
, 0, kcred
)) != 0) {
1855 /* take note, but not many alternatives here */
1856 mutex_enter(&uf_stats
.ufst_mutex
);
1857 uf_stats
.ufst_unmount_failures
++;
1858 mutex_exit(&uf_stats
.ufst_mutex
);
1860 TRIVIA((": unmount failed] "));
1862 cmn_err(CE_NOTE
, "unmounted error-locked %s", fs_name(f
));
1866 if (toolong
|| (err
!= EBUSY
&& err
!= EAGAIN
))
1867 rc
= set_state(f
, UF_NOTFIX
);
1874 sf_term_cmn(ufs_failure_t
*f
, ufsa_t a
, ufs_failure_states_t s
)
1877 sfrc_t rc
= SFRC_FAIL
;
1879 TRIVIA(("[sf_term_cmn, action is %s, state is %s",
1880 act_name(a
), state_name(s
)));
1881 ASSERT(s
& (UF_FIXED
| UF_NOTFIX
| UF_REPLICA
));
1882 ASSERT(terminal_state(s
));
1884 if (!f
->uf_ufsvfsp
&& !(f
->uf_s
& (UF_UMOUNT
| UF_NOTFIX
))) {
1885 TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n"));
1897 if (f
->uf_ufsvfsp
&& f
->uf_vfs_lockp
)
1898 need_lock_vfs
= !MUTEX_HELD(f
->uf_vfs_lockp
);
1902 if (need_lock_vfs
&& !mutex_tryenter(f
->uf_vfs_lockp
)) {
1903 TRIVIA((": tryenter(vfslockp) fail; retry]\n"));
1908 f
->uf_end_tm
= time
;
1909 f
->uf_lf
.lf_lock
= LOCKFS_OLOCK
;
1912 if (f
->uf_vfs_ufsfxp
)
1913 f
->uf_vfs_ufsfxp
->fx_current
= NULL
;
1916 mutex_exit(f
->uf_vfs_lockp
);
1918 cmn_err(CE_NOTE
, (s
& UF_NOTFIX
)? "Could not fix %s":
1919 "%s is now accessible", fs_name(f
));
1922 mutex_enter(&uf_stats
.ufst_mutex
);
1923 uf_stats
.ufst_num_fixed
++;
1924 mutex_exit(&uf_stats
.ufst_mutex
);
1926 (void) timeout(ufsfx_kill_fix_failure_thread
,
1927 (void *)(ufsfx_tune
.uft_short_err_period
* hz
),
1928 ufsfx_tune
.uft_short_err_period
* hz
);
1934 ASSERT(MUTEX_HELD(f
->uf_vfs_lockp
));
1936 /* not actually a replica? */
1937 if (f
->uf_vfs_ufsfxp
&& f
->uf_vfs_ufsfxp
->fx_current
&&
1938 f
->uf_vfs_ufsfxp
->fx_current
!= f
&&
1940 f
->uf_vfs_ufsfxp
->fx_current
->uf_s
)) {
1942 f
->uf_orig
= f
->uf_vfs_ufsfxp
->fx_current
;
1946 TRIVIA((": NULL fx_current]\n"));
1953 rc
= set_state(f
, UF_PANIC
);
1954 TRIVIA((": failed] "));
1961 * XXX de-allocate these after some period?
1962 * XXX or move to an historical list?
1963 * XXX or have an ioctl which reaps them?
1966 * For now, since we don't expect lots of failures
1967 * to occur (to the point of memory shortages),
1971 /* be sure we're not wasting cpu on old failures */
1972 if (f
->uf_retry
!= 0) {
1973 mutex_enter(&uf_stats
.ufst_mutex
);
1974 uf_stats
.ufst_cpu_waste
++;
1975 mutex_exit(&uf_stats
.ufst_mutex
);
1982 (void) set_state(f
, UF_PANIC
);
1983 TRIVIA((": failed] "));
1995 ufs_failure_states_t s
)
1997 sfrc_t rc
= SFRC_FAIL
;
1999 TRIVIA(("[sf_panic, action is %s, prev. state is %s",
2000 act_name(a
), state_name(f
->uf_s
)));
2001 ASSERT(s
& UF_PANIC
);
2005 f
->uf_retry
= -ufsfx_tune
.uft_short_err_period
;
2013 /* LINTED: warning: logical expression always true: op "||" */
2016 (void) set_state(f
, UF_UMOUNT
); /* XXX UF_NOTFIX? */
2026 * minimum state function
2031 ufsa_t a
, /* LINTED argument unused in function: ignored */
2032 ufs_failure_states_t ignored
)
2034 sfrc_t rc
= SFRC_FAIL
;
2036 TRIVIA(("[sf_minimum, action is %s", act_name(a
)));
2048 (void) set_state(f
, UF_PANIC
);
2049 TRIVIA((": failed] "));
2058 state_trans_valid(ufs_failure_states_t from
, ufs_failure_states_t to
)
2063 HIDEOUS(("[state_trans_valid"));
2068 s
= get_state_desc(to
);
2071 * extra test is necessary since we want UF_UNDEF = 0,
2072 * (to detect freshly allocated memory)
2073 * but can't check for that value with a bit test
2075 valid
= (to
& UF_INIT
)? from
== s
->ud_prev
: from
& s
->ud_prev
;
2077 HIDEOUS((": %svalid] ", valid
? "": "in"));
2082 terminal_state(ufs_failure_states_t state
)
2086 HIDEOUS(("[terminal_state"));
2088 s
= get_state_desc(state
);
2090 HIDEOUS((": %sterminal] ", s
->ud_attr
.terminal
? "": "not "));
2091 return ((int)s
->ud_attr
.terminal
);
2095 alloc_lockfs_comment(ufs_failure_t
*f
, struct lockfs
*lfp
)
2097 MINUTE(("[alloc_lockfs_comment"));
2098 ASSERT(MUTEX_HELD(&f
->uf_mutex
));
2101 * ufs_fiolfs expects a kmem_alloc'ed comment;
2102 * it frees the comment if the lock fails
2103 * or else when the lock is unlocked.
2106 f
->uf_lf
.lf_comment
= kmem_zalloc(LOCKFS_MAXCOMMENTLEN
, KM_NOSLEEP
);
2107 if (f
->uf_lf
.lf_comment
) {
2112 * use panic string if there's no previous comment
2113 * or if we're setting the error lock
2115 if ((LOCKFS_IS_ELOCK(&f
->uf_lf
) || !lfp
->lf_comment
||
2116 lfp
->lf_comlen
<= 0)) {
2117 from
= f
->uf_panic_str
;
2118 len
= LOCKFS_MAXCOMMENTLEN
;
2120 from
= lfp
->lf_comment
;
2121 len
= lfp
->lf_comlen
;
2124 bcopy(from
, f
->uf_lf
.lf_comment
, len
);
2125 f
->uf_lf
.lf_comlen
= len
;
2128 f
->uf_lf
.lf_comlen
= 0;
2134 set_lockfs(ufs_failure_t
*f
, struct lockfs
*lfp
)
2136 int (*handle_lockfs_rc
)(ufs_failure_t
*);
2139 MINUTE(("[set_lockfs"));
2140 ASSERT(MUTEX_HELD(&f
->uf_mutex
));
2141 ASSERT(!vfs_lock_held(f
->uf_vfsp
));
2142 ASSERT(MUTEX_NOT_HELD(f
->uf_vfs_lockp
));
2144 if (!f
->uf_ufsvfsp
) {
2145 MINUTE((": ufsvfsp is NULL]\n"));
2149 ASSERT(MUTEX_NOT_HELD(&f
->uf_ufsvfsp
->vfs_ulockfs
.ul_lock
));
2151 if (!f
->uf_ufsvfsp
->vfs_root
) {
2152 MINUTE((": vfs_root is NULL]\n"));
2156 alloc_lockfs_comment(f
, lfp
);
2159 if (!LOCKFS_IS_ELOCK(lfp
)) {
2160 lfp
->lf_lock
= f
->uf_lf
.lf_lock
= LOCKFS_ELOCK
;
2161 VN_HOLD(f
->uf_ufsvfsp
->vfs_root
);
2163 ufs__fiolfs(f
->uf_ufsvfsp
->vfs_root
,
2164 &f
->uf_lf
, /* from_user */ 0, /* from_log */ 0);
2165 VN_RELE(f
->uf_ufsvfsp
->vfs_root
);
2168 handle_lockfs_rc
= f
->uf_lf_err
!= 0? lockfs_failure
: lockfs_success
;
2169 rc
= handle_lockfs_rc(f
);
2176 lockfs_failure(ufs_failure_t
*f
)
2179 ufs_failure_states_t s
;
2181 TRIVIA(("[lockfs_failure"));
2182 ASSERT(MUTEX_HELD(&f
->uf_mutex
));
2184 if (!f
->uf_ufsvfsp
) {
2185 TRIVIA((": ufsvfsp is NULL]\n"));
2189 error
= f
->uf_lf_err
;
2191 /* non-transient errors: */
2192 case EACCES
: /* disk/in-core metadata reconciliation failed */
2193 case EPERM
: /* inode reconciliation failed; incore inode changed? */
2194 case EIO
: /* device is hard-locked or not responding */
2195 case EROFS
: /* device is write-locked */
2196 case EDEADLK
: /* can't lockfs; deadlock would result; */
2197 /* Swapping or saving accounting records */
2198 /* onto this fs can cause this errno. */
2200 MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)",
2201 fs_name(f
), lock_name(&f
->uf_lf
),
2202 err_name(error
), error
));
2205 * if can't get lock, then fallback to panic, unless
2206 * unless unmount was requested (although unmount will
2207 * probably fail if the lock failed, so we'll panic
2211 s
= ((f
->uf_flags
& UFSFX_LCKUMOUNT
) && error
!= EDEADLK
) ?
2212 UF_UMOUNT
: UF_PANIC
;
2214 if (!set_state(f
, s
)) {
2225 f
->uf_retry
= ufsfx_tune
.uft_short_err_period
;
2226 if (curthread
->t_flag
& T_DONTPEND
) {
2227 curthread
->t_flag
&= ~T_DONTPEND
;
2229 } else if (!(f
->uf_s
& (UF_LOCKED
| UF_FIXING
))) {
2230 ufs_failure_states_t state
;
2232 * if we didn't know that the fix had started,
2235 state
= error
== EBUSY
? UF_LOCKED
: UF_FIXING
;
2236 if (!set_state(f
, state
)) {
2237 TRIVIA((": failed] "));
2243 default: /* some other non-fatal error */
2244 MINOR(("lockfs(\"%s\") of %s returned %s (%d)",
2245 lock_name(&f
->uf_lf
), fs_name(f
),
2246 err_name(f
->uf_lf_err
), f
->uf_lf_err
));
2248 f
->uf_retry
= ufsfx_tune
.uft_short_err_period
;
2251 case EINVAL
: /* unmounted? */
2252 (void) set_state(f
, UF_NOTFIX
);
2260 lockfs_success(ufs_failure_t
*f
)
2262 TRIVIA(("[lockfs_success"));
2263 ASSERT(MUTEX_HELD(&f
->uf_mutex
));
2265 if (!f
->uf_ufsvfsp
) {
2266 TRIVIA((": ufsvfsp is NULL]\n"));
2270 switch (f
->uf_lf
.lf_lock
) {
2271 case LOCKFS_ELOCK
: /* error lock worked */
2273 if (!set_state(f
, UF_LOCKED
)) {
2274 TRIVIA((": failed] "));
2279 case LOCKFS_ULOCK
: /* unlock worked */
2281 * how'd we get here?
2282 * This should be done from fsck's unlock,
2283 * not from this thread's context.
2285 cmn_err(CE_WARN
, "Unlocked error-lock of %s", fs_name(f
));
2286 ufsfx_unlockfs(f
->uf_ufsvfsp
);
2290 if (!set_state(f
, UF_NOTFIX
)) {
2291 TRIVIA((": failed] "));
2301 * when fsck is running it puts its pid into the lockfs
2302 * comment structure, prefaced by PIDSTR
2304 const char *PIDSTR
= "[pid:";
2306 fsck_active(ufs_failure_t
*f
)
2309 int i
, found
, errlocked
;
2311 const int PIDSTRLEN
= (int)strlen(PIDSTR
);
2312 struct ulockfs
*ulp
= &f
->uf_ufsvfsp
->vfs_ulockfs
;
2314 TRIVIA(("[fsck_active"));
2317 ASSERT(f
->uf_s
& UF_FIXING
);
2318 ASSERT(MUTEX_HELD(&f
->uf_mutex
));
2319 ASSERT(f
->uf_ufsvfsp
);
2320 ASSERT(MUTEX_NOT_HELD(f
->uf_vfs_lockp
));
2321 ASSERT(MUTEX_NOT_HELD(&ulp
->ul_lock
));
2323 mutex_enter(&ulp
->ul_lock
);
2324 cp
= ulp
->ul_lockfs
.lf_comment
;
2325 comlen
= ulp
->ul_lockfs
.lf_comlen
;
2326 errlocked
= (int)ULOCKFS_IS_ELOCK(ulp
);
2327 mutex_exit(&ulp
->ul_lock
);
2329 if (!cp
|| comlen
== 0) {
2330 TRIVIA((": null comment or comlen <= 0, found:0]"));
2334 for (found
= i
= 0; !found
&& i
< (comlen
- PIDSTRLEN
); i
++, cp
++)
2335 found
= strncmp(cp
, PIDSTR
, PIDSTRLEN
) == 0;
2337 TRIVIA(("found:%d, is_elock:%d]", found
, errlocked
));
2338 return (errlocked
& found
);
2341 static const char unknown_fs
[] = "<unknown fs>";
2342 static const char null_failure
[] = "<NULL ufs failure record; unknown fs>";
2343 static const char mutated_vfs_bufp
[] = "<mutated vfs_bufp, unknown fs>";
2344 static const char mutated_vfs_fs
[] = "<mutated vfs_fs, unknown fs>";
2347 fs_name(ufs_failure_t
*f
)
2349 HIDEOUS(("[fs_name"));
2350 ASSERT(MUTEX_HELD(&f
->uf_mutex
));
2353 HIDEOUS((": failure ptr is NULL]\n"));
2354 return ((char *)null_failure
);
2357 if (f
->uf_fsname
[0] != '\0') {
2358 HIDEOUS((": return (uf_fsname)]\n"));
2359 return (f
->uf_fsname
);
2362 if (MUTEX_HELD(f
->uf_vfs_lockp
)) {
2363 if (f
->uf_bp
!= f
->uf_ufsvfsp
->vfs_bufp
) {
2364 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2365 (void *)f
->uf_bp
, (void *)f
->uf_ufsvfsp
->vfs_bufp
));
2366 return ((char *)mutated_vfs_bufp
);
2368 if (f
->uf_fs
!= f
->uf_ufsvfsp
->vfs_fs
) {
2369 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2370 (void *)f
->uf_fs
, (void *)f
->uf_ufsvfsp
->vfs_fs
));
2371 return ((char *)mutated_vfs_fs
);
2373 if (f
->uf_ufsvfsp
&& f
->uf_bp
&& f
->uf_fs
&&
2374 *f
->uf_fs
->fs_fsmnt
!= '\0') {
2375 HIDEOUS((": return (fs_fsmnt)]\n"));
2376 return (f
->uf_fs
->fs_fsmnt
);
2380 HIDEOUS((": unknown file system]\n"));
2381 return ((char *)unknown_fs
);
2386 lock_name(struct lockfs
*lfp
)
2388 struct lock_description
*l
;
2391 HIDEOUS(("[lock_name"));
2393 lname
= lock_desc
[0].ld_name
;
2394 for (l
= &lock_desc
[1]; l
->ld_name
!= NULL
; l
++) {
2395 if (lfp
&& lfp
->lf_lock
== l
->ld_type
) {
2405 state_name(ufs_failure_states_t state
)
2409 HIDEOUS(("[state_name"));
2411 s
= get_state_desc(state
);
2414 return (s
->ud_name
);
2420 struct error_description
*e
;
2422 HIDEOUS(("[err_name"));
2424 for (e
= &err_desc
[1]; e
->ed_name
!= NULL
; e
++) {
2425 if (error
== e
->ed_errno
) {
2427 return (e
->ed_name
);
2431 return (err_desc
[0].ed_name
);
2435 act_name(ufsa_t action
)
2437 struct action_description
*a
;
2439 HIDEOUS(("[act_name"));
2441 for (a
= &act_desc
[1]; a
->ad_name
!= NULL
; a
++) {
2442 if (action
== a
->ad_v
) {
2444 return (a
->ad_name
);
2448 return (act_desc
[0].ad_name
);
2455 dump_uf_list(char *msg
)
2459 int list_was_locked
= MUTEX_HELD(&ufs_fix
.uq_mutex
);
2461 if (!list_was_locked
&& !mutex_tryenter(&ufs_fix
.uq_mutex
)) {
2462 printf("dump_uf_list: couldn't get list lock\n");
2467 printf("\n%s", msg
);
2469 printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n",
2470 ufs_fix
.uq_lowat
, ufs_fix
.uq_ne
);
2472 mutex_enter(&uf_stats
.ufst_mutex
);
2473 printf("\tuf_stats.current_races: %ld\n", uf_stats
.ufst_current_races
);
2474 printf("\tuf_stats.num_failed: %ld\n", uf_stats
.ufst_num_failed
);
2475 printf("\tuf_stats.num_fixed: %ld\n", uf_stats
.ufst_num_fixed
);
2476 printf("\tuf_stats.cpu_waste: %ld\n", uf_stats
.ufst_cpu_waste
);
2477 printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n",
2478 uf_stats
.ufst_lock_violations
, uf_stats
.ufst_unmount_failures
);
2479 mutex_exit(&uf_stats
.ufst_mutex
);
2481 for (f
= ufs_fix
.uq_ufhead
, i
= 1; f
; f
= f
->uf_next
, i
++) {
2483 if (!mutex_tryenter(&f
->uf_mutex
)) {
2484 printf("%d.\t\"skipped - try enter failed\"\n", i
);
2490 mutex_exit(&f
->uf_mutex
);
2495 if (!list_was_locked
)
2496 mutex_exit(&ufs_fix
.uq_mutex
);
2500 dump_uf(ufs_failure_t
*f
, int i
)
2503 printf("dump_uf: NULL failure record\n");
2507 printf("%d.\t\"%s\" is %s.\n",
2508 i
, fs_name(f
), state_name(f
->uf_s
));
2509 printf("\t\"%s\"\tAddr: 0x%p\n", f
->uf_panic_str
, (void *)f
);
2510 printf("\tNext: 0x%p\t\tPrev: 0x%p\n",
2511 (void *)f
->uf_next
, (void *)f
->uf_prev
);
2514 printf("\tOriginal failure: 0x%p \"%s\"\n",
2515 (void *)f
->uf_orig
, f
->uf_orig
->uf_panic_str
);
2517 printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n",
2518 (void *)f
->uf_ufsvfsp
, (void *)f
->uf_vfs_lockp
);
2519 printf("\tVfs_fsfxp: 0x%p\n", (void *)f
->uf_vfs_ufsfxp
);
2520 printf("\tVfs_bufp: 0x%p", (void *)f
->uf_bp
);
2523 printf("\t\tVfs_fs: 0x%p\n", (void *)f
->uf_fs
);
2527 printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n",
2528 f
->uf_begin_tm
, f
->uf_entered_tm
, f
->uf_end_tm
);
2530 printf("\tFlags: (%d) %s%s%s%s", f
->uf_flags
,
2531 f
->uf_flags
& UFSFX_LCKONLY
? "\"lock only\" " : "",
2532 f
->uf_flags
& UFSFX_LCKUMOUNT
? "\"lock+unmount\" " : "",
2533 f
->uf_flags
& UFSFX_REPAIR_START
? "\"started repair\" " : "",
2534 f
->uf_flags
== 0? "<none>" : "");
2536 printf("\tRetry: %ld seconds\n", f
->uf_retry
);
2538 printf("\tLockfs:\ttype: %s\terror: %s (%d)\n",
2539 lock_name(&f
->uf_lf
), err_name(f
->uf_lf_err
), f
->uf_lf_err
);
2545 * returns # of ufs_failures in a non-terminal state on queue
2546 * used to coordinate with hlock thread (see ufs_thread.c)
2547 * and to determine when the error lock thread may exit
2551 ufsfx_get_failure_qlen(void)
2557 MINUTE(("[ufsfx_get_failure_qlen"));
2559 if (!mutex_tryenter(&ufs_fix
.uq_mutex
))
2563 * walk down failure list
2566 for (f
= ufs_fix
.uq_ufhead
; f
; f
= f
->uf_next
) {
2568 if (!mutex_tryenter(&f
->uf_mutex
))
2571 s
= get_state_desc(f
->uf_s
);
2573 if (s
->ud_attr
.terminal
) {
2574 mutex_exit(&f
->uf_mutex
);
2578 MINUTE((": found: %s, \"%s: %s\"\n",
2579 fs_name(f
), state_name(f
->uf_s
), f
->uf_panic_str
));
2582 mutex_exit(&f
->uf_mutex
);
2585 mutex_exit(&ufs_fix
.uq_mutex
);
2587 MINUTE((": qlen=%d]\n", qlen
));
2594 * called to shutdown fix failure thread and server daemon
2597 ufsfx_kill_fix_failure_thread(void *arg
)
2599 clock_t odelta
= (clock_t)arg
;
2602 MAJOR(("[ufsfx_kill_fix_failure_thread"));
2604 qlen
= ufsfx_get_failure_qlen();
2609 delta
= odelta
<< 1;
2613 (void) timeout(ufsfx_kill_fix_failure_thread
,
2614 (void *)delta
, delta
);
2615 MAJOR((": rescheduled"));
2617 } else if (qlen
== 0) {
2618 ufs_thread_exit(&ufs_fix
);
2619 MAJOR((": killed"));
2623 * let timeout expire