dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / ufs / ufs_panic.c
blobedc7cf1e1de3b7c2544dfd718cb0cbc9616bbf41
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/errno.h>
30 #include <sys/mode.h>
31 #include <sys/sysmacros.h>
32 #include <sys/cmn_err.h>
33 #include <sys/varargs.h>
34 #include <sys/time.h>
35 #include <sys/buf.h>
36 #include <sys/kmem.h>
37 #include <sys/t_lock.h>
38 #include <sys/poll.h>
39 #include <sys/debug.h>
40 #include <sys/cred.h>
41 #include <sys/lockfs.h>
42 #include <sys/fs/ufs_fs.h>
43 #include <sys/fs/ufs_inode.h>
44 #include <sys/fs/ufs_panic.h>
45 #include <sys/fs/ufs_lockfs.h>
46 #include <sys/fs/ufs_trans.h>
47 #include <sys/fs/ufs_mount.h>
48 #include <sys/fs/ufs_prot.h>
49 #include <sys/fs/ufs_bio.h>
50 #include <sys/pathname.h>
51 #include <sys/utsname.h>
52 #include <sys/conf.h>
54 /* handy */
55 #define abs(x) ((x) < 0? -(x): (x))
57 #if defined(DEBUG)
59 #define DBGLVL_NONE 0x00000000
60 #define DBGLVL_MAJOR 0x00000100
61 #define DBGLVL_MINOR 0x00000200
62 #define DBGLVL_MINUTE 0x00000400
63 #define DBGLVL_TRIVIA 0x00000800
64 #define DBGLVL_HIDEOUS 0x00001000
66 #define DBGFLG_NONE 0x00000000
67 #define DBGFLG_NOPANIC 0x00000001
68 #define DBGFLG_LVLONLY 0x00000002
69 #define DBGFLG_FIXWOULDPANIC 0x00000004
71 #define DBGFLG_FLAGMASK 0x0000000F
72 #define DBGFLG_LEVELMASK ~DBGFLG_FLAGMASK
74 #define DEBUG_FLAGS (ufs_fix_failure_dbg & DBGFLG_FLAGMASK)
75 #define DEBUG_LEVEL (ufs_fix_failure_dbg & DBGFLG_LEVELMASK)
77 unsigned int ufs_fix_failure_dbg = DBGLVL_NONE | DBGFLG_NONE;
79 #define DCALL(dbg_level, call) \
80 { \
81 if (DEBUG_LEVEL != DBGLVL_NONE) { \
82 if (DEBUG_FLAGS & DBGFLG_LVLONLY) { \
83 if (DEBUG_LEVEL & dbg_level) { \
84 call; \
85 } \
86 } else { \
87 if (dbg_level <= DEBUG_LEVEL) { \
88 call; \
89 } \
90 } \
91 } \
94 #define DPRINTF(dbg_level, msg) DCALL(dbg_level, printf msg)
96 #define MAJOR(msg) DPRINTF(DBGLVL_MAJOR, msg)
97 #define MINOR(msg) DPRINTF(DBGLVL_MINOR, msg)
98 #define MINUTE(msg) DPRINTF(DBGLVL_MINUTE, msg)
99 #define TRIVIA(msg) DPRINTF(DBGLVL_TRIVIA, msg)
100 #define HIDEOUS(msg) DPRINTF(DBGLVL_HIDEOUS, msg)
102 #else /* !DEBUG */
104 #define DCALL(ignored_dbg_level, ignored_routine)
105 #define MAJOR(ignored)
106 #define MINOR(ignored)
107 #define MINUTE(ignored)
108 #define TRIVIA(ignored)
109 #define HIDEOUS(ignored)
111 #endif /* DEBUG */
113 #define NULLSTR(str) (!(str) || *(str) == '\0'? "<null>" : (str))
114 #define NULSTRING ""
116 /* somewhat arbitrary limits, in seconds */
117 /* all probably ought to be different, but these are convenient for debugging */
118 const time_t UF_TOO_LONG = 128; /* max. wait for fsck start */
120 /* all of these are in units of seconds used for retry period while ... */
121 const time_t UF_FIXSTART_PERIOD = 16; /* awaiting fsck start */
122 const time_t UF_FIXPOLL_PERIOD = 256; /* awaiting fsck finish */
123 const time_t UF_SHORT_ERROR_PERIOD = 4; /* after (lockfs) error */
124 const time_t UF_LONG_ERROR_PERIOD = 512; /* after (lockfs) error */
126 #define NO_ERROR 0
127 #define LOCKFS_OLOCK LOCKFS_MAXLOCK+1
129 const ulong_t GB = 1024 * 1024 * 1024;
130 const ulong_t SecondsPerGig = 1024; /* ~17 minutes (overestimate) */
133 * per filesystem flags
135 const int UFSFX_PANIC = (UFSMNT_ONERROR_PANIC >> 4);
136 const int UFSFX_LCKONLY = (UFSMNT_ONERROR_LOCK >> 4);
137 const int UFSFX_LCKUMOUNT = (UFSMNT_ONERROR_UMOUNT >> 4);
138 const int UFSFX_DEFAULT = (UFSMNT_ONERROR_DEFAULT >> 4);
139 const int UFSFX_REPAIR_START = 0x10000000;
141 /* return protocols */
143 typedef enum triage_return_code {
144 TRIAGE_DEAD = -1,
145 TRIAGE_NO_SPIRIT,
146 TRIAGE_ATTEND_TO
147 } triage_t;
149 typedef enum statefunc_return_code {
150 SFRC_SUCCESS = 1,
151 SFRC_FAIL = 0
152 } sfrc_t;
154 /* external references */
155 /* in ufs_thread.c */
156 extern int ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop);
157 extern int ufs_checkaccton(vnode_t *); /* in ufs_lockfs.c */
158 extern int ufs_checkswapon(vnode_t *); /* in ufs_lockfs.c */
160 extern struct pollhead ufs_pollhd; /* in ufs_vnops.c */
162 /* globals */
163 struct ufs_q ufs_fix;
166 * patchable constants:
167 * These are set in ufsfx_init() [called at modload]
169 struct ufs_failure_tunable {
170 long uft_too_long; /* limit repair startup time */
171 long uft_fixstart_period; /* pre-repair start period */
172 long uft_fixpoll_period; /* post-fsck start period */
173 long uft_short_err_period; /* post-error short period */
174 long uft_long_err_period; /* post-error long period */
175 } ufsfx_tune;
177 /* internal statistics of events */
178 struct uf_statistics {
179 ulong_t ufst_lock_violations;
180 ulong_t ufst_current_races;
181 ulong_t ufst_unmount_failures;
182 ulong_t ufst_num_fixed;
183 ulong_t ufst_num_failed;
184 ulong_t ufst_cpu_waste;
185 time_t ufst_last_start_tm;
186 kmutex_t ufst_mutex;
187 } uf_stats;
189 typedef enum state_action {
190 UFA_ERROR = -1, /* internal error */
191 UFA_FOUND, /* found uf in state */
192 UFA_SET /* change uf to state */
193 } ufsa_t;
195 /* state definition */
196 typedef struct uf_state_desc {
197 int ud_v; /* value */
198 char *ud_name; /* name */
199 sfrc_t (*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
200 /* per-state actions */
201 ufs_failure_states_t ud_prev; /* valid prev. states */
203 struct uf_state_desc_attr {
204 unsigned terminal:1; /* no action req. if found */
205 unsigned at_fail:1; /* state set by thread */
206 /* encountering the error */
207 unsigned unused;
208 } ud_attr;
209 } ufsd_t;
212 * forward references
215 /* thread to watch for failures */
216 static void ufsfx_thread_fix_failures(void *);
217 static int ufsfx_do_failure_q(void);
218 static void ufsfx_kill_fix_failure_thread(void *);
220 /* routines called when failure occurs */
221 static int ufs_fault_v(vnode_t *, char *, va_list)
222 __KVPRINTFLIKE(2);
223 static ufs_failure_t *init_failure(vnode_t *, char *, va_list)
224 __KVPRINTFLIKE(2);
225 static void queue_failure(ufs_failure_t *);
226 /*PRINTFLIKE2*/
227 static void real_panic(ufs_failure_t *, const char *, ...)
228 __KPRINTFLIKE(2);
229 static void real_panic_v(ufs_failure_t *, const char *, va_list)
230 __KVPRINTFLIKE(2);
231 static triage_t triage(vnode_t *);
233 /* routines called when failure record is acted upon */
234 static sfrc_t set_state(ufs_failure_t *, ufs_failure_states_t);
235 static int state_trans_valid(ufs_failure_states_t, ufs_failure_states_t);
236 static int terminal_state(ufs_failure_states_t);
238 /* routines called when states entered/found */
239 static sfrc_t sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
240 static sfrc_t sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
241 static sfrc_t sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
242 static sfrc_t sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
243 static sfrc_t sf_found_queue(ufs_failure_t *);
244 static sfrc_t sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
245 static sfrc_t sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
246 static sfrc_t sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
247 static sfrc_t sf_set_trylck(ufs_failure_t *);
248 static sfrc_t sf_set_locked(ufs_failure_t *);
249 static sfrc_t sf_found_trylck(ufs_failure_t *);
250 static sfrc_t sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t);
251 static sfrc_t sf_found_umount(ufs_failure_t *);
253 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */
254 static time_t trylock_time_exceeded(ufs_failure_t *);
255 static void pester_msg(ufs_failure_t *, int);
256 static int get_lockfs_status(ufs_failure_t *, struct lockfs *);
257 static void alloc_lockfs_comment(ufs_failure_t *, struct lockfs *);
258 static int set_lockfs(ufs_failure_t *, struct lockfs *);
259 static int lockfs_failure(ufs_failure_t *);
260 static int lockfs_success(ufs_failure_t *);
261 static int fsck_active(ufs_failure_t *);
263 /* low-level support routines */
264 static ufsd_t *get_state_desc(ufs_failure_states_t);
265 static char *fs_name(ufs_failure_t *);
267 #if defined(DEBUG)
268 static char *state_name(ufs_failure_states_t);
269 static char *lock_name(struct lockfs *);
270 static char *err_name(int);
271 static char *act_name(ufsa_t);
272 static void dump_uf_list(char *msg);
273 static void dump_uf(ufs_failure_t *, int i);
274 #endif /* DEBUG */
277 * State Transitions:
279 * normally:
280 * if flagged to be locked but not unmounted: (UFSMNT_ONERROR_LOCK)
281 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED
283 * The only difference between these two is that the fsck must be started
284 * manually.
286 * if flagged to be unmounted: (UFSMNT_ONERROR_UMOUNT)
287 * UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX
289 * if flagged to panic: (UFSMNT_ONERROR_PANIC)
290 * UNDEF -> INIT -> PANIC
292 * if a secondary panic on a file system which has an active failure
293 * record:
294 * UNDEF -> INIT -> QUEUE -> REPLICA
296 * UNDEF, INIT, QUEUE all are set in the context of the failing thread.
297 * All other states (except possibly PANIC) are set in by the monitor
298 * (lock) thread.
302 ufsd_t state_desc[] =
304 { UF_ILLEGAL, "in an unknown state", sf_minimum, UF_ILLEGAL,
305 { 0, 1, 0 } },
306 { UF_UNDEF, "undefined", sf_undef, UF_UNDEF,
307 { 0, 1, 0 } },
308 { UF_INIT, "being initialized", sf_init, UF_UNDEF,
309 { 0, 1, 0 } },
310 { UF_QUEUE, "queued", sf_queue, UF_INIT,
311 { 0, 1, 0 } },
312 { UF_TRYLCK, "trying to be locked", sf_nonterm_cmn,
313 UF_QUEUE, { 0, 0, 0 } },
314 { UF_LOCKED, "locked", sf_nonterm_cmn,
315 UF_TRYLCK | UF_FIXING, { 0, 0, 0 } },
316 { UF_UMOUNT, "being unmounted", sf_nonterm_cmn,
318 #if defined(DEBUG)
319 UF_PANIC |
320 #endif /* DEBUG */
321 UF_TRYLCK | UF_LOCKED, { 0, 0, 0 } },
322 { UF_FIXING, "being fixed", sf_nonterm_cmn,
323 UF_LOCKED, { 0, 0, 0 } },
324 { UF_FIXED, "fixed", sf_term_cmn,
325 UF_FIXING, { 1, 0, 0 } },
326 { UF_NOTFIX, "not fixed", sf_term_cmn,
328 #if defined(DEBUG)
329 UF_PANIC |
330 #endif /* DEBUG */
332 UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING,
333 { 1, 0, 0 } },
334 { UF_REPLICA, "a replica", sf_term_cmn,
335 UF_QUEUE, { 1, 0, 0 } },
336 { UF_PANIC, "panicking", sf_panic,
337 /* XXX make this narrower */ UF_ALLSTATES, { 0, 0, 0 } },
338 { UF_UNDEF, NULL, ((sfrc_t (*)()) NULL),
339 UF_UNDEF, { 0, 0, 0 } }
342 /* unified collection */
343 struct ufsfx_info {
344 struct uf_statistics *ufi_statp;
345 struct ufs_failure_tunable *ufi_tunep;
346 ufsd_t *ufi_statetab;
347 } uffsinfo;
349 #if defined(DEBUG)
350 struct action_description {
351 ufsa_t ad_v;
352 char *ad_name;
355 #define EUNK (-1)
357 struct error_description {
358 int ed_errno;
359 char *ed_name;
360 } err_desc[] =
362 { EUNK, "<unexpected errno?>" },
363 { EINVAL, "EINVAL" },
364 { EACCES, "EACCES" },
365 { EPERM, "EPERM" },
366 { EIO, "EIO" },
367 { EDEADLK, "EDEADLK" },
368 { EBUSY, "EBUSY" },
369 { EAGAIN, "EAGAIN" },
370 { ERESTART, "ERESTART" },
371 { ETIMEDOUT, "ETIMEDOUT" },
372 { NO_ERROR, "Ok" },
373 { EUNK, NULL }
376 struct action_description act_desc[] =
378 { UFA_ERROR, "<unexpected action?>" },
379 { UFA_FOUND, "\"found\"" },
380 { UFA_SET, "\"set\"" },
381 { UFA_ERROR, NULL },
384 #define LOCKFS_BADLOCK (-1)
386 struct lock_description {
387 int ld_type;
388 char *ld_name;
389 } lock_desc[] =
391 { LOCKFS_BADLOCK, "<unexpected lock?>" },
392 { LOCKFS_ULOCK, "Unlock" },
393 { LOCKFS_ELOCK, "Error Lock" },
394 { LOCKFS_HLOCK, "Hard Lock" },
395 { LOCKFS_OLOCK, "Old Lock" },
396 { LOCKFS_BADLOCK, NULL }
399 #endif /* DEBUG */
402 * ufs_fault, ufs_fault_v
404 * called instead of cmn_err(CE_PANIC, ...) by ufs routines
405 * when a failure is detected to put the file system into an
406 * error state (if possible) or to devolve to a panic otherwise
408 * vnode is some vnode in this file system, used to find the way
409 * to ufsvfs, vfsp etc. Since a panic can be called from many
410 * levels, the vnode is the most convenient hook to pass through.
414 /*PRINTFLIKE2*/
416 ufs_fault(vnode_t *vp, char *fmt, ...)
418 va_list adx;
419 int error;
421 MINOR(("[ufs_fault"));
423 va_start(adx, fmt);
424 error = ufs_fault_v(vp, fmt, adx);
425 va_end(adx);
427 MINOR((": %s (%d)]\n", err_name(error), error));
428 return (error);
431 const char *nullfmt = "<null format?>";
433 static int
434 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx)
436 ufs_failure_t *new = NULL;
437 ufsvfs_t *ufsvfsp;
438 triage_t fix;
439 int err = ERESTART;
440 int need_vfslock;
442 MINOR(("[ufs_fault_v"));
444 if (fmt == NULL)
445 fmt = (char *)nullfmt;
447 fix = triage(vp);
449 if (vp) {
450 ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data;
453 * Something bad has happened. That is why we are here.
455 * In order for the bad thing to be recorded in the superblock
456 * we need to write to the superblock directly.
457 * In the case that logging is enabled the logging code
458 * would normally intercept our write as a delta to the log,
459 * thus we mark the filesystem FSBAD in any case.
461 need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
463 if (need_vfslock) {
464 mutex_enter(&ufsvfsp->vfs_lock);
467 ufsvfsp->vfs_fs->fs_clean = FSBAD;
468 ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem));
469 ufsvfsp->vfs_bufp->b_flags &=
470 ~(B_ASYNC | B_READ | B_DONE | B_ERROR | B_DELWRI);
472 (void) bdev_strategy(ufsvfsp->vfs_bufp);
473 (void) biowait(ufsvfsp->vfs_bufp);
475 if (need_vfslock) {
476 mutex_exit(&ufsvfsp->vfs_lock);
480 switch (fix) {
482 default:
483 case TRIAGE_DEAD:
484 case TRIAGE_NO_SPIRIT:
486 real_panic_v(new, fmt, adx);
487 /* LINTED: warning: logical expression always true: op "||" */
488 ASSERT(DEBUG);
489 err = EAGAIN;
491 #if defined(DEBUG)
492 if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) {
493 break;
495 /* FALLTHROUGH */
497 #else
498 break;
500 #endif /* DEBUG */
502 case TRIAGE_ATTEND_TO:
504 /* q thread not running yet? */
505 if (mutex_tryenter(&ufs_fix.uq_mutex)) {
506 if (!ufs_fix.uq_threadp) {
507 mutex_exit(&ufs_fix.uq_mutex);
508 ufs_thread_start(&ufs_fix,
509 ufsfx_thread_fix_failures, NULL);
510 ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK;
511 mutex_enter(&ufs_fix.uq_mutex);
512 } else {
514 * We got the lock but we are not the current
515 * threadp so we have to release the lock.
517 mutex_exit(&ufs_fix.uq_mutex);
519 } else {
520 MINOR((": fix failure thread already running "));
522 * No need to log another failure as one is already
523 * being logged.
525 break;
528 if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) {
529 mutex_exit(&ufs_fix.uq_mutex);
530 cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault");
531 } else {
533 * Must check if we actually still own the lock and
534 * if so then release the lock and move on with life.
536 if (mutex_owner(&ufs_fix.uq_mutex) == curthread)
537 mutex_exit(&ufs_fix.uq_mutex);
540 new = init_failure(vp, fmt, adx);
541 if (new != NULL) {
542 queue_failure(new);
543 break;
545 real_panic_v(new, fmt, adx);
546 break;
549 MINOR(("] "));
550 return (err);
554 * triage()
556 * Attempt to fix iff:
557 * - the system is not already panicking
558 * - this file system isn't explicitly marked not to be fixed
559 * - we can connect to the user-level daemon
560 * These conditions are detectable later, but if we can determine
561 * them in the failing threads context the core dump may be more
562 * useful.
566 static triage_t
567 triage(vnode_t *vp)
569 struct inode *ip;
570 int need_unlock_vfs;
571 int fs_flags;
573 MINUTE(("[triage"));
575 if (panicstr) {
576 MINUTE((
577 ": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr));
578 return (TRIAGE_DEAD);
581 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) {
582 MINUTE((
583 ": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n"));
584 return (TRIAGE_DEAD);
587 /* use tryenter and continue no matter what since we're panicky */
588 need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
589 if (need_unlock_vfs)
590 need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock);
592 fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags;
593 if (need_unlock_vfs)
594 mutex_exit(&ip->i_ufsvfs->vfs_lock);
596 if (fs_flags & UFSFX_PANIC) {
597 MINUTE((
598 ": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n"));
599 return (TRIAGE_NO_SPIRIT);
602 if (ufs_checkaccton(vp) != 0) {
603 MINUTE((
604 ": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n"));
605 return (TRIAGE_DEAD);
608 if (ufs_checkswapon(vp) != 0) {
609 MINUTE((
610 ": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n"));
611 return (TRIAGE_DEAD);
614 MINUTE((": return TRIAGE_ATTEND_TO] "));
615 return (TRIAGE_ATTEND_TO);
619 * init failure
621 * This routine allocates a failure struct and initializes
622 * it's member elements.
623 * Space is allocated for copies of dynamic identifying fs structures
624 * passed in. Without a much more segmented kernel architecture
625 * this is as protected as we can make it (for now.)
627 static ufs_failure_t *
628 init_failure(vnode_t *vp, char *fmt, va_list adx)
630 ufs_failure_t *new;
631 struct inode *ip;
632 int initialization_worked = 0;
633 int need_vfs_unlock;
635 MINOR(("[init_failure"));
637 new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP);
638 if (!new) {
639 MINOR((": kmem_zalloc failed]\n"));
640 return (NULL);
644 * enough information to make a fix attempt possible?
646 if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp ||
647 !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt)
648 goto errout;
650 if (vp->v_type != VREG && vp->v_type != VDIR &&
651 vp->v_type != VBLK && vp->v_type != VCHR &&
652 vp->v_type != VLNK && vp->v_type != VFIFO &&
653 vp->v_type != VSOCK)
654 goto errout;
656 if (ip->i_ufsvfs->vfs_root->v_type != VREG &&
657 ip->i_ufsvfs->vfs_root->v_type != VDIR &&
658 ip->i_ufsvfs->vfs_root->v_type != VBLK &&
659 ip->i_ufsvfs->vfs_root->v_type != VCHR &&
660 ip->i_ufsvfs->vfs_root->v_type != VLNK &&
661 ip->i_ufsvfs->vfs_root->v_type != VFIFO &&
662 ip->i_ufsvfs->vfs_root->v_type != VSOCK)
663 goto errout;
665 if ((ITOF(ip)->fs_magic != FS_MAGIC) &&
666 (ITOF(ip)->fs_magic != MTB_UFS_MAGIC))
667 goto errout;
669 /* intialize values */
671 (void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx);
673 new->uf_ufsvfsp = ip->i_ufsvfs;
674 new->uf_vfsp = ip->i_vfs;
676 mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL);
677 need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
679 if (need_vfs_unlock) {
680 if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) {
682 * not much alternative here, but we're panicking
683 * already, it couldn't be worse - so just
684 * proceed optimistically and take note.
686 mutex_enter(&uf_stats.ufst_mutex);
687 uf_stats.ufst_lock_violations++;
688 mutex_exit(&uf_stats.ufst_mutex);
689 MINOR((": couldn't get vfs lock"))
690 need_vfs_unlock = 0;
694 if (mutex_tryenter(&new->uf_mutex)) {
695 initialization_worked = set_state(new, UF_INIT);
696 mutex_exit(&new->uf_mutex);
699 if (need_vfs_unlock)
700 mutex_exit(&ip->i_ufsvfs->vfs_lock);
702 if (initialization_worked) {
703 MINOR(("] "));
704 return (new);
706 /* FALLTHROUGH */
708 errout:
709 if (new)
710 kmem_free(new, sizeof (ufs_failure_t));
711 MINOR((": failed]\n"));
712 return (NULL);
715 static void
716 queue_failure(ufs_failure_t *new)
718 MINOR(("[queue_failure"));
720 mutex_enter(&ufs_fix.uq_mutex);
722 if (ufs_fix.uq_ufhead)
723 insque(new, &ufs_fix.uq_ufhead);
724 else
725 ufs_fix.uq_ufhead = new;
727 if (mutex_tryenter(&new->uf_mutex)) {
728 (void) set_state(new, UF_QUEUE);
729 mutex_exit(&new->uf_mutex);
732 mutex_enter(&uf_stats.ufst_mutex); /* force wakeup */
733 ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed;
734 mutex_exit(&uf_stats.ufst_mutex);
736 cv_broadcast(&ufs_fix.uq_cv);
738 DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str ?
739 new->uf_panic_str : "queue_failure: NULL panic str?"));
740 mutex_exit(&ufs_fix.uq_mutex);
742 MINOR(("] "));
745 /*PRINTFLIKE2*/
746 static void
747 real_panic(ufs_failure_t *f, const char *fmt, ...)
749 va_list adx;
751 MINUTE(("[real_panic "));
753 va_start(adx, fmt);
754 real_panic_v(f, fmt, adx);
755 va_end(adx);
757 MINUTE((": return?!]\n"));
760 static void
761 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx)
763 int seriousness = CE_PANIC;
764 int need_unlock;
766 MINUTE(("[real_panic_v "));
768 if (f && f->uf_ufsvfsp)
769 TRANS_SETERROR(f->uf_ufsvfsp);
771 #if defined(DEBUG)
772 if (DEBUG_FLAGS & DBGFLG_NOPANIC) {
773 seriousness = CE_WARN;
774 cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n");
776 #endif /* DEBUG */
778 ddi_msleep(500); /* allow previous warnings to get out */
780 if (!f && fmt)
781 vcmn_err(seriousness, fmt, adx);
782 else
783 cmn_err(seriousness, f && f->uf_panic_str? f->uf_panic_str:
784 "real_panic: <unknown panic?>");
786 if (f) {
787 need_unlock = !MUTEX_HELD(&f->uf_mutex);
788 if (need_unlock) {
789 mutex_enter(&f->uf_mutex);
792 f->uf_retry = -1;
793 (void) set_state(f, UF_PANIC);
795 if (need_unlock) {
796 mutex_exit(&f->uf_mutex);
799 MINUTE((": return?!]\n"));
803 * initializes ufs panic structs, locks, etc
805 void
806 ufsfx_init(void)
809 MINUTE(("[ufsfx_init"));
811 /* patchable; unchanged while running, so no lock is needed */
812 ufsfx_tune.uft_too_long = UF_TOO_LONG;
813 ufsfx_tune.uft_fixstart_period = UF_FIXSTART_PERIOD;
814 ufsfx_tune.uft_fixpoll_period = UF_FIXPOLL_PERIOD;
815 ufsfx_tune.uft_short_err_period = UF_SHORT_ERROR_PERIOD;
816 ufsfx_tune.uft_long_err_period = UF_LONG_ERROR_PERIOD;
818 uffsinfo.ufi_statp = &uf_stats;
819 uffsinfo.ufi_tunep = &ufsfx_tune;
820 uffsinfo.ufi_statetab = &state_desc[0];
822 mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL);
823 ufs_thread_init(&ufs_fix, /* maxne */ 1);
825 MINUTE(("] "));
829 * initializes per-ufs values
830 * returns 0 (ok) or errno
833 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags)
835 MINUTE(("[ufsfx_mount (%d)", flags));
836 /* don't check/need vfs_lock because it's still being initialized */
838 ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4;
840 MINUTE((": %s: fx_flags:%ld,",
841 ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags));
843 * onerror={panic ^ lock only ^ unmount}
846 if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) {
847 MINUTE((" PANIC"));
849 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) {
850 MINUTE((" LCKONLY"));
852 } else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) {
853 MINUTE((" LCKUMOUNT"));
855 } else {
856 ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT;
857 ASSERT(ufsvfsp->vfs_fsfx.fx_flags &
858 (UFSMNT_ONERROR_FLGMASK >> 4));
859 MINUTE((" DEFAULT"));
862 pollwakeup(&ufs_pollhd, POLLPRI);
863 MINUTE(("]\n"));
864 return (0);
868 * ufsfx_unmount
870 * called during unmount
872 void
873 ufsfx_unmount(struct ufsvfs *ufsvfsp)
875 ufs_failure_t *f;
876 int must_unlock_list;
878 MINUTE(("[ufsfx_unmount"));
880 if (!ufsvfsp) {
881 MINUTE((": no ufsvfsp]"));
882 return;
885 if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0)
886 mutex_enter(&ufs_fix.uq_mutex);
888 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
889 int must_unlock_failure;
891 must_unlock_failure = !MUTEX_HELD(&f->uf_mutex);
892 if (must_unlock_failure) {
893 mutex_enter(&f->uf_mutex);
896 if (f->uf_ufsvfsp == ufsvfsp) {
899 * if we owned the failure record lock, then this
900 * is probably a fix failure-triggered unmount, so
901 * the warning is not appropriate or needed
904 /* XXX if rebooting don't print this? */
905 if (!terminal_state(f->uf_s) && must_unlock_failure) {
906 cmn_err(CE_WARN,
907 "Unmounting %s while error-locked",
908 fs_name(f));
911 f->uf_ufsvfsp = NULL;
912 f->uf_vfs_ufsfxp = NULL;
913 f->uf_vfs_lockp = NULL;
914 f->uf_bp = NULL;
915 f->uf_vfsp = NULL;
916 f->uf_retry = -1;
919 if (must_unlock_failure)
920 mutex_exit(&f->uf_mutex);
922 if (must_unlock_list)
923 mutex_exit(&ufs_fix.uq_mutex);
925 pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP);
926 MINUTE(("] "));
930 * ufsfx_(un)lockfs
932 * provides hook from lockfs code so we can recognize unlock/relock
933 * This is called after it is certain that the (un)lock will succeed.
935 void
936 ufsfx_unlockfs(struct ufsvfs *ufsvfsp)
938 ufs_failure_t *f;
939 int need_unlock;
940 int need_unlock_list;
941 int informed = 0;
943 MINUTE(("[ufsfx_unlockfs"));
945 if (!ufsvfsp)
946 return;
948 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
950 if (need_unlock_list)
951 mutex_enter(&ufs_fix.uq_mutex);
953 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
955 need_unlock = !MUTEX_HELD(&f->uf_mutex);
956 if (need_unlock)
957 mutex_enter(&f->uf_mutex);
959 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) {
960 if (!(f->uf_s & UF_FIXING)) {
962 * This might happen if we don't notice that
963 * the fs gets marked FSFIX before it is
964 * marked FSCLEAN, as might occur if the
965 * the superblock was hammered directly.
967 if (!informed) {
968 informed = 1;
969 cmn_err(CE_NOTE,
970 "Unlock of %s succeeded before "
971 "fs_clean marked FSFIX?",
972 fs_name(f));
976 * pass through fixing state so
977 * transition protocol is satisfied
979 if (!set_state(f, UF_FIXING)) {
980 MINUTE((": failed] "));
984 if (!set_state(f, UF_FIXED)) {
985 /* it's already fixed, so don't panic now */
986 MINUTE((": failed] "));
990 if (need_unlock)
991 mutex_exit(&f->uf_mutex);
993 if (need_unlock_list)
994 mutex_exit(&ufs_fix.uq_mutex);
995 MINUTE(("] "));
998 void
999 ufsfx_lockfs(struct ufsvfs *ufsvfsp)
1001 ufs_failure_t *f;
1002 int need_unlock;
1003 int need_unlock_list;
1005 MINUTE(("[ufsfx_lockfs"));
1007 if (!ufsvfsp)
1008 return;
1010 need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
1012 if (need_unlock_list)
1013 mutex_enter(&ufs_fix.uq_mutex);
1015 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1017 need_unlock = !MUTEX_HELD(&f->uf_mutex);
1018 if (need_unlock)
1019 mutex_enter(&f->uf_mutex);
1021 if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) &&
1022 f->uf_s != UF_PANIC) {
1023 switch (f->uf_s) {
1025 default:
1026 cmn_err(CE_WARN,
1027 "fs %s not in state "
1028 "UF_TRYLCK, UF_LOCKED or UF_FIXING",
1029 fs_name(f));
1030 break;
1032 case UF_TRYLCK:
1033 if (!set_state(f, UF_LOCKED)) {
1034 MINUTE((": failed] "));
1036 break;
1038 case UF_LOCKED:
1039 if (!set_state(f, UF_FIXING)) {
1040 MINUTE((": failed] "));
1042 break;
1044 case UF_FIXING:
1045 break;
1050 if (need_unlock)
1051 mutex_exit(&f->uf_mutex);
1053 if (need_unlock_list)
1054 mutex_exit(&ufs_fix.uq_mutex);
1056 MINUTE(("] "));
1060 * error lock, trigger fsck and unlock those fs with failures
1061 * blatantly copied from the hlock routine, although this routine
1062 * triggers differently in order to use uq_ne as meaningful data.
1064 /* ARGSUSED */
1065 void
1066 ufsfx_thread_fix_failures(void *ignored)
1068 int retry;
1069 callb_cpr_t cprinfo;
1071 CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr,
1072 "ufsfixfail");
1074 MINUTE(("[ufsfx_thread_fix_failures] "));
1076 for (;;) {
1077 /* sleep until there is work to do */
1079 mutex_enter(&ufs_fix.uq_mutex);
1080 (void) ufs_thread_run(&ufs_fix, &cprinfo);
1081 ufs_fix.uq_ne = 0;
1082 mutex_exit(&ufs_fix.uq_mutex);
1084 /* process failures on our q */
1085 do {
1086 retry = ufsfx_do_failure_q();
1087 if (retry) {
1088 mutex_enter(&ufs_fix.uq_mutex);
1089 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1090 (void) cv_reltimedwait(&ufs_fix.uq_cv,
1091 &ufs_fix.uq_mutex, (hz * retry),
1092 TR_CLOCK_TICK);
1093 CALLB_CPR_SAFE_END(&cprinfo,
1094 &ufs_fix.uq_mutex);
1095 mutex_exit(&ufs_fix.uq_mutex);
1097 } while (retry);
1099 /* NOTREACHED */
1104 * watch for fix-on-panic work
1106 * returns # of seconds to sleep before trying again
1107 * and zero if no retry is needed
1111 ufsfx_do_failure_q(void)
1113 ufs_failure_t *f;
1114 long retry = 1;
1115 ufsd_t *s;
1117 MAJOR(("[ufsfx_do_failure_q"));
1118 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1120 if (!mutex_tryenter(&ufs_fix.uq_mutex))
1121 return (retry);
1123 retry = 0;
1124 rescan_q:
1127 * walk down failure list
1128 * depending on state of each failure, do whatever
1129 * is appropriate to move it to the next state
1130 * taking note of whether retry gets set
1132 * retry protocol:
1133 * wakeup in shortest required time for any failure
1134 * retry == 0; nothing more to do (terminal state)
1135 * retry < 0; reprocess queue immediately, retry will
1136 * be abs(retry) for the next cycle
1137 * retry > 0; schedule wakeup for retry seconds
1140 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1142 if (!mutex_tryenter(&f->uf_mutex)) {
1143 retry = 1;
1144 continue;
1146 s = get_state_desc(f->uf_s);
1148 MINOR((": found%s: %s, \"%s: %s\"\n",
1149 s->ud_attr.terminal ? " old" : "",
1150 fs_name(f), state_name(f->uf_s), f->uf_panic_str));
1152 if (s->ud_attr.terminal) {
1153 mutex_exit(&f->uf_mutex);
1154 continue;
1157 if (s->ud_sfp)
1158 (*s->ud_sfp)(f, UFA_FOUND, f->uf_s);
1160 ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0);
1162 if (f->uf_retry != 0) {
1163 if (retry > f->uf_retry || retry == 0)
1164 retry = f->uf_retry;
1165 if (f->uf_retry < 0)
1166 f->uf_retry = abs(f->uf_retry);
1168 mutex_exit(&f->uf_mutex);
1172 if (retry < 0) {
1173 retry = abs(retry);
1174 goto rescan_q;
1177 mutex_exit(&ufs_fix.uq_mutex);
1179 DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1180 MAJOR((": retry=%ld, good night]\n\n", retry));
1182 return (retry);
1185 static void
1186 pester_msg(ufs_failure_t *f, int seriousness)
1188 MINUTE(("[pester_msg"));
1189 ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING));
1192 * XXX if seems too long for this fs, poke administrator
1193 * XXX to run fsck manually (and change retry time?)
1195 cmn_err(seriousness, "Waiting for repair of %s to %s",
1196 fs_name(f), f->uf_s & UF_LOCKED ? "start" : "finish");
1197 MINUTE(("]"));
1200 static time_t
1201 trylock_time_exceeded(ufs_failure_t *f)
1203 time_t toolong;
1204 extern time_t time;
1206 MINUTE(("[trylock_time_exceeded"));
1207 ASSERT(MUTEX_HELD(&f->uf_mutex));
1209 toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm;
1210 if (time > toolong)
1211 cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f));
1213 MINUTE(("] "));
1214 return (time <= toolong? 0: time - toolong);
1217 static int
1218 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp)
1220 MINUTE(("[get_lockfs_status"));
1222 if (!f->uf_ufsvfsp) {
1223 MINUTE((": ufsvfsp is NULL]\n"));
1224 return (0);
1227 ASSERT(MUTEX_HELD(&f->uf_mutex));
1228 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1229 ASSERT(!vfs_lock_held(f->uf_vfsp));
1230 ASSERT(f->uf_ufsvfsp->vfs_root != NULL);
1232 f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp);
1234 if (f->uf_lf_err) {
1235 f->uf_retry = ufsfx_tune.uft_short_err_period;
1238 MINUTE(("] "));
1239 return (1);
1242 static sfrc_t
1243 set_state(ufs_failure_t *f, ufs_failure_states_t new_state)
1245 ufsd_t *s;
1246 sfrc_t sfrc = SFRC_FAIL;
1247 int need_unlock;
1248 extern time_t time;
1250 HIDEOUS(("[set_state: new state:%s", state_name(new_state)));
1251 ASSERT(f);
1252 ASSERT(MUTEX_HELD(&f->uf_mutex));
1255 * if someone else is panicking, just let panic sync proceed
1257 if (panicstr) {
1258 (void) set_state(f, UF_NOTFIX);
1259 HIDEOUS((": state reset: not fixed] "));
1260 return (sfrc);
1264 * bad state transition, an internal error
1266 if (!state_trans_valid(f->uf_s, new_state)) {
1267 /* recursion */
1268 if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC))
1269 (void) set_state(f, UF_PANIC);
1270 MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ",
1271 state_name(f->uf_s), state_name(new_state)));
1272 return (sfrc);
1275 s = get_state_desc(new_state);
1277 need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex);
1278 if (need_unlock)
1279 mutex_enter(&ufs_fix.uq_mutex);
1281 if (s->ud_attr.at_fail && ufs_fix.uq_threadp &&
1282 curthread == ufs_fix.uq_threadp) {
1283 cmn_err(CE_WARN, "set_state: probable recursive panic of %s",
1284 fs_name(f));
1286 if (need_unlock)
1287 mutex_exit(&ufs_fix.uq_mutex);
1289 /* NULL state functions always succeed */
1290 sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state);
1292 if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) {
1293 f->uf_s = new_state;
1294 f->uf_entered_tm = time;
1295 f->uf_counter = 0;
1298 HIDEOUS(("]\n"));
1299 return (sfrc);
1302 static ufsd_t *
1303 get_state_desc(ufs_failure_states_t state)
1305 ufsd_t *s;
1307 HIDEOUS(("[get_state_desc"));
1309 for (s = &state_desc[1]; s->ud_name != NULL; s++) {
1310 if (s->ud_v == state) {
1311 HIDEOUS(("] "));
1312 return (s);
1316 HIDEOUS(("] "));
1317 return (&state_desc[0]); /* default */
1320 static sfrc_t
1321 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1323 sfrc_t rc;
1325 TRIVIA(("[sf_undef, action is %s, state is %s\n",
1326 act_name(a), state_name(s)));
1327 ASSERT(s == UF_UNDEF);
1329 /* shouldn't find null failure records or ever set one */
1330 rc = set_state(f, UF_NOTFIX);
1332 TRIVIA(("] "));
1333 return (rc);
1337 static sfrc_t
1338 sf_init(
1339 ufs_failure_t *f,
1340 ufsa_t a,
1341 ufs_failure_states_t s)
1343 sfrc_t rc = SFRC_FAIL;
1344 extern time_t time;
1346 TRIVIA(("[sf_init, action is %s", act_name(a)));
1347 ASSERT(s & UF_INIT);
1349 switch (a) {
1350 case UFA_SET:
1351 f->uf_begin_tm = time;
1352 f->uf_retry = 1;
1353 if (!f->uf_ufsvfsp) {
1354 (void) set_state(f, UF_PANIC);
1355 TRIVIA((": NULL ufsvfsp]\n"));
1356 return (rc);
1359 * because we can call panic from many different levels,
1360 * we can't be sure that we've got the vfs_lock at this
1361 * point. However, there's not much alternative and if
1362 * we don't (have the lock) the worst case is we'll just
1363 * panic again
1365 f->uf_vfs_lockp = &f->uf_ufsvfsp->vfs_lock;
1366 f->uf_vfs_ufsfxp = &f->uf_ufsvfsp->vfs_fsfx;
1368 if (!f->uf_ufsvfsp->vfs_bufp) {
1369 (void) set_state(f, UF_PANIC);
1370 TRIVIA((": NULL vfs_bufp]\n"));
1371 return (rc);
1373 f->uf_bp = f->uf_ufsvfsp->vfs_bufp;
1375 if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) {
1376 (void) set_state(f, UF_PANIC);
1377 TRIVIA((": NULL vfs_fs]\n"));
1378 return (rc);
1381 /* vfs_fs = vfs_bufp->b_un.b_fs */
1382 bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN);
1384 f->uf_lf.lf_lock = LOCKFS_ELOCK; /* primer */
1386 if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) {
1387 (void) set_state(f, UF_PANIC);
1388 TRIVIA((": NULL vfsp or vfs_dev == NODEV"));
1389 return (rc);
1391 f->uf_dev = f->uf_vfsp->vfs_dev;
1393 rc = SFRC_SUCCESS;
1394 break;
1396 case UFA_FOUND:
1397 default:
1398 /* failures marked init shouldn't even be on the queue yet */
1399 rc = set_state(f, UF_QUEUE);
1400 TRIVIA((": found failure with state init]\n"));
1403 TRIVIA(("] "));
1404 return (rc);
1407 static sfrc_t
1408 sf_queue(
1409 ufs_failure_t *f,
1410 ufsa_t a,
1411 ufs_failure_states_t s)
1413 sfrc_t rc = SFRC_FAIL;
1415 TRIVIA(("[sf_queue, action is %s", act_name(a)));
1416 ASSERT(s & UF_QUEUE);
1418 if (!f->uf_ufsvfsp) {
1419 TRIVIA((": NULL ufsvfsp]\n"));
1420 return (rc);
1423 switch (a) {
1424 case UFA_FOUND:
1425 rc = sf_found_queue(f);
1426 break;
1428 case UFA_SET:
1430 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1432 mutex_enter(&uf_stats.ufst_mutex);
1433 uf_stats.ufst_num_failed++;
1434 mutex_exit(&uf_stats.ufst_mutex);
1437 * if can't get the vfs lock, just wait until
1438 * UF_TRYLCK to set fx_current
1440 if (mutex_tryenter(f->uf_vfs_lockp)) {
1441 f->uf_vfs_ufsfxp->fx_current = f;
1442 mutex_exit(f->uf_vfs_lockp);
1443 } else {
1444 mutex_enter(&uf_stats.ufst_mutex);
1445 uf_stats.ufst_current_races++;
1446 mutex_exit(&uf_stats.ufst_mutex);
1449 f->uf_retry = 1;
1450 rc = SFRC_SUCCESS;
1451 TRIVIA(("] "));
1452 break;
1454 default:
1455 (void) set_state(f, UF_PANIC);
1456 TRIVIA((": failed] "));
1459 return (rc);
1462 static sfrc_t
1463 sf_found_queue(ufs_failure_t *f)
1465 int replica;
1466 sfrc_t rc = SFRC_FAIL;
1468 TRIVIA(("[sf_found_queue"));
1471 * don't need to check for null ufsvfsp because
1472 * unmount must own list's ufs_fix.uq_mutex
1473 * to mark it null and we own that lock since
1474 * we got here.
1477 ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1478 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1480 if (!mutex_tryenter(f->uf_vfs_lockp)) {
1481 TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1482 f->uf_retry = 1;
1483 return (rc);
1486 replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL &&
1487 f->uf_vfs_ufsfxp->fx_current != f &&
1488 !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s);
1491 * copy general flags to this ufs_failure so we don't
1492 * need to refer back to the ufsvfs, or, more importantly,
1493 * don't need to keep acquiring (trying to acquire) vfs_lockp
1495 * The most restrictive option wins:
1496 * panic > errlock only > errlock+unmount > repair
1497 * XXX panic > elock > elock > elock+umount
1499 if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) {
1500 if (!set_state(f, UF_PANIC)) {
1501 TRIVIA((": marked panic but was queued?"));
1502 real_panic(f, " ");
1503 /*NOTREACHED*/
1505 mutex_exit(f->uf_vfs_lockp);
1506 return (rc);
1508 f->uf_flags = f->uf_vfs_ufsfxp->fx_flags;
1510 if (replica) {
1511 if (!set_state(f, UF_REPLICA)) {
1512 f->uf_retry = 1;
1513 TRIVIA((": set to replica failed] "));
1514 } else {
1515 TRIVIA(("] "));
1517 mutex_exit(f->uf_vfs_lockp);
1518 return (rc);
1520 mutex_exit(f->uf_vfs_lockp);
1522 if (!set_state(f, UF_TRYLCK)) {
1523 TRIVIA((": failed] "));
1524 } else {
1525 rc = SFRC_SUCCESS;
1527 return (rc);
1530 static sfrc_t
1531 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1533 sfrc_t rc = SFRC_FAIL;
1535 TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s)));
1536 ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING));
1537 ASSERT(!terminal_state(s));
1539 if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) {
1540 TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n"));
1541 (void) set_state(f, UF_NOTFIX);
1542 return (rc);
1545 switch (a) {
1546 case UFA_SET:
1547 switch (s) {
1548 case UF_TRYLCK:
1549 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1550 rc = sf_set_trylck(f);
1551 break;
1553 case UF_LOCKED:
1554 rc = sf_set_locked(f);
1555 break;
1557 case UF_FIXING:
1558 f->uf_flags |= UFSFX_REPAIR_START;
1559 f->uf_retry = ufsfx_tune.uft_fixpoll_period;
1560 rc = SFRC_SUCCESS;
1561 break;
1563 case UF_UMOUNT:
1564 f->uf_retry = -ufsfx_tune.uft_short_err_period;
1565 rc = SFRC_SUCCESS;
1566 break;
1568 default:
1569 (void) set_state(f, UF_PANIC);
1570 TRIVIA((": failed] "));
1572 break;
1574 case UFA_FOUND:
1576 switch (s) {
1577 case UF_TRYLCK:
1578 rc = sf_found_trylck(f);
1579 break;
1581 case UF_LOCKED:
1582 case UF_FIXING:
1583 rc = sf_found_lock_fix_cmn(f, s);
1584 break;
1586 case UF_UMOUNT:
1587 rc = sf_found_umount(f);
1588 break;
1590 default:
1591 (void) set_state(f, UF_PANIC);
1592 TRIVIA((": failed] "));
1593 break;
1595 break;
1596 default:
1597 (void) set_state(f, UF_PANIC);
1598 TRIVIA((": failed] "));
1599 break;
1602 TRIVIA(("] "));
1603 return (rc);
1606 static sfrc_t
1607 sf_set_trylck(ufs_failure_t *f)
1609 TRIVIA(("[sf_set_trylck"));
1611 if (!mutex_tryenter(f->uf_vfs_lockp)) {
1612 TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1613 f->uf_retry = 1;
1614 return (SFRC_FAIL);
1617 if (!f->uf_vfs_ufsfxp->fx_current)
1618 f->uf_vfs_ufsfxp->fx_current = f;
1620 mutex_exit(f->uf_vfs_lockp);
1622 f->uf_lf.lf_flags = 0;
1623 f->uf_lf.lf_lock = LOCKFS_ELOCK;
1624 f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1625 TRIVIA(("] "));
1626 return (SFRC_SUCCESS);
1629 static sfrc_t
1630 sf_found_trylck(ufs_failure_t *f)
1632 struct lockfs lockfs_status;
1634 TRIVIA(("[sf_found_trylck"));
1636 if (trylock_time_exceeded(f) > 0) {
1637 (void) set_state(f, UF_PANIC);
1638 TRIVIA((": failed] "));
1639 return (SFRC_FAIL);
1642 if (!get_lockfs_status(f, &lockfs_status)) {
1643 (void) set_state(f, UF_PANIC);
1644 TRIVIA((": failed] "));
1645 return (SFRC_FAIL);
1648 if (f->uf_lf_err == NO_ERROR)
1649 f->uf_lf.lf_key = lockfs_status.lf_key;
1651 if (!set_lockfs(f, &lockfs_status)) {
1652 (void) set_state(f, UF_PANIC);
1653 TRIVIA((": failed] "));
1654 return (SFRC_FAIL);
1656 TRIVIA(("] "));
1657 return (SFRC_SUCCESS);
1660 static sfrc_t
1661 sf_set_locked(ufs_failure_t *f)
1663 TRIVIA(("[sf_set_locked"));
1665 f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1667 #if defined(DEBUG)
1668 if (f->uf_flags & UFSFX_REPAIR_START)
1669 TRIVIA(("clearing UFSFX_REPAIR_START "));
1670 #endif /* DEBUG */
1672 f->uf_flags &= ~UFSFX_REPAIR_START;
1674 if (f->uf_s & UF_TRYLCK) {
1675 cmn_err(CE_WARN, "Error-locked %s: \"%s\"",
1676 fs_name(f), f->uf_panic_str);
1678 if (f->uf_flags & UFSFX_LCKONLY)
1679 cmn_err(CE_WARN, "Manual repair of %s required",
1680 fs_name(f));
1684 * just reset to current state
1686 #if defined(DEBUG)
1687 TRIVIA(("locked->locked "));
1688 #endif /* DEBUG */
1690 TRIVIA(("] "));
1691 return (SFRC_SUCCESS);
1694 static sfrc_t
1695 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s)
1697 time_t toolong;
1698 extern time_t time;
1699 struct buf *bp = NULL;
1700 struct fs *dfs;
1701 time_t concerned, anxious;
1702 sfrc_t rc = SFRC_FAIL;
1703 ulong_t gb_size;
1705 TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s)));
1707 if (s & UF_LOCKED) {
1708 ASSERT(MUTEX_HELD(&f->uf_mutex));
1710 toolong =
1711 time > (ufsfx_tune.uft_too_long + f->uf_entered_tm);
1712 TRIVIA(("%stoolong", !toolong? "not": ""));
1713 HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ",
1714 time, ufsfx_tune.uft_too_long, f->uf_entered_tm));
1716 if (f->uf_flags & UFSFX_LCKUMOUNT) {
1717 if (set_state(f, UF_UMOUNT)) {
1718 TRIVIA(("] "));
1719 rc = SFRC_SUCCESS;
1720 } else {
1721 TRIVIA((": failed] "));
1722 f->uf_retry = 1;
1724 return (rc);
1726 if (!toolong) {
1727 rc = SFRC_SUCCESS;
1728 } else {
1729 if (!(f->uf_flags & UFSFX_REPAIR_START)) {
1730 cmn_err(CE_WARN, "%s repair of %s not started.",
1731 (f->uf_flags & UFSFX_LCKONLY) ?
1732 "Manual" : "Automatic", fs_name(f));
1734 f->uf_retry = ufsfx_tune.uft_long_err_period;
1735 } else {
1736 f->uf_retry = ufsfx_tune.uft_long_err_period;
1737 cmn_err(CE_WARN, "Repair of %s is not timely; "
1738 "operator attention is required.",
1739 fs_name(f));
1741 TRIVIA(("] "));
1742 return (rc);
1746 #if defined(DEBUG)
1747 else {
1748 ASSERT(s & UF_FIXING);
1750 #endif /* DEBUG */
1753 * get on disk superblock; force it to really
1754 * come from the disk
1756 (void) bfinval(f->uf_dev, 0);
1757 bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE);
1758 if (bp) {
1759 bp->b_flags |= (B_STALE | B_AGE);
1760 dfs = bp->b_un.b_fs;
1763 if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) &&
1764 (dfs->fs_magic != MTB_UFS_MAGIC))) {
1765 TRIVIA((": UFS_BREAD(SBLOCK) failed]\n"));
1766 f->uf_retry = 1;
1767 goto out;
1770 /* fsck started but we haven't noticed yet? */
1771 if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1772 if (!set_state(f, UF_FIXING)) {
1773 TRIVIA((": failed]\n"));
1774 f->uf_retry = 1;
1775 goto out;
1779 /* fsck started but didn't succeed? */
1780 if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) {
1781 TRIVIA((": fs_clean: %d", (int)dfs->fs_clean));
1782 (void) set_state(f, UF_LOCKED);
1783 cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f));
1784 f->uf_retry = ufsfx_tune.uft_long_err_period;
1785 goto out;
1788 gb_size = (dfs->fs_size * dfs->fs_bshift) / GB;
1789 toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig);
1791 /* fsck started but doesn't seem to be proceeding? */
1792 if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1793 if (time > f->uf_entered_tm + toolong) {
1795 cmn_err(CE_WARN,
1796 "Repair completion timeout exceeded on %s; "
1797 "manual fsck may be required", fs_name(f));
1798 f->uf_retry = ufsfx_tune.uft_long_err_period;
1802 concerned = f->uf_entered_tm + (toolong / 3);
1803 anxious = f->uf_entered_tm + ((2 * toolong) / 3);
1805 if (time > concerned)
1806 pester_msg(f, time > anxious? CE_WARN: CE_NOTE);
1808 TRIVIA(("] "));
1810 out:
1811 if (bp)
1812 brelse(bp);
1814 return (rc);
1817 static sfrc_t
1818 sf_found_umount(ufs_failure_t *f)
1820 extern time_t time;
1821 sfrc_t rc = SFRC_FAIL;
1822 struct vfs *vfsp = f->uf_vfsp;
1823 struct ufsvfs *ufsvfsp = f->uf_ufsvfsp;
1824 int toolong = 0;
1825 int err = 0;
1827 TRIVIA(("[sf_found_umount"));
1829 toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm;
1830 if (toolong) {
1831 TRIVIA((": unmount time limit exceeded] "));
1832 goto out;
1835 if (!vfsp || !ufsvfsp) { /* trivial case */
1836 TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] "));
1837 goto out;
1840 if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) {
1841 TRIVIA((": !not error locked?"));
1842 err = EINVAL;
1843 goto out;
1846 /* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */
1847 if (vn_vfswlock(vfsp->vfs_vnodecovered)) {
1848 TRIVIA((": couldn't lock coveredvp"));
1849 err = EBUSY;
1850 goto out;
1853 if ((err = dounmount(vfsp, 0, kcred)) != 0) {
1855 /* take note, but not many alternatives here */
1856 mutex_enter(&uf_stats.ufst_mutex);
1857 uf_stats.ufst_unmount_failures++;
1858 mutex_exit(&uf_stats.ufst_mutex);
1860 TRIVIA((": unmount failed] "));
1861 } else {
1862 cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f));
1865 out:
1866 if (toolong || (err != EBUSY && err != EAGAIN))
1867 rc = set_state(f, UF_NOTFIX);
1869 TRIVIA(("] "));
1870 return (rc);
1873 static sfrc_t
1874 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1876 extern time_t time;
1877 sfrc_t rc = SFRC_FAIL;
1879 TRIVIA(("[sf_term_cmn, action is %s, state is %s",
1880 act_name(a), state_name(s)));
1881 ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA));
1882 ASSERT(terminal_state(s));
1884 if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) {
1885 TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n"));
1886 return (rc);
1889 switch (a) {
1890 case UFA_SET:
1891 switch (s) {
1892 case UF_NOTFIX:
1893 case UF_FIXED:
1895 int need_lock_vfs;
1897 if (f->uf_ufsvfsp && f->uf_vfs_lockp)
1898 need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp);
1899 else
1900 need_lock_vfs = 0;
1902 if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) {
1903 TRIVIA((": tryenter(vfslockp) fail; retry]\n"));
1904 f->uf_retry = 1;
1905 break;
1908 f->uf_end_tm = time;
1909 f->uf_lf.lf_lock = LOCKFS_OLOCK;
1910 f->uf_retry = 0;
1912 if (f->uf_vfs_ufsfxp)
1913 f->uf_vfs_ufsfxp->fx_current = NULL;
1915 if (need_lock_vfs)
1916 mutex_exit(f->uf_vfs_lockp);
1918 cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s":
1919 "%s is now accessible", fs_name(f));
1921 if (s & UF_FIXED) {
1922 mutex_enter(&uf_stats.ufst_mutex);
1923 uf_stats.ufst_num_fixed++;
1924 mutex_exit(&uf_stats.ufst_mutex);
1926 (void) timeout(ufsfx_kill_fix_failure_thread,
1927 (void *)(ufsfx_tune.uft_short_err_period * hz),
1928 ufsfx_tune.uft_short_err_period * hz);
1929 rc = SFRC_SUCCESS;
1930 break;
1932 case UF_REPLICA:
1934 ASSERT(MUTEX_HELD(f->uf_vfs_lockp));
1936 /* not actually a replica? */
1937 if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current &&
1938 f->uf_vfs_ufsfxp->fx_current != f &&
1939 !terminal_state(
1940 f->uf_vfs_ufsfxp->fx_current->uf_s)) {
1942 f->uf_orig = f->uf_vfs_ufsfxp->fx_current;
1943 f->uf_retry = 0;
1944 rc = SFRC_SUCCESS;
1945 } else {
1946 TRIVIA((": NULL fx_current]\n"));
1947 f->uf_retry = 1;
1950 break;
1952 default:
1953 rc = set_state(f, UF_PANIC);
1954 TRIVIA((": failed] "));
1955 break;
1957 break;
1959 case UFA_FOUND:
1961 * XXX de-allocate these after some period?
1962 * XXX or move to an historical list?
1963 * XXX or have an ioctl which reaps them?
1966 * For now, since we don't expect lots of failures
1967 * to occur (to the point of memory shortages),
1968 * just punt
1971 /* be sure we're not wasting cpu on old failures */
1972 if (f->uf_retry != 0) {
1973 mutex_enter(&uf_stats.ufst_mutex);
1974 uf_stats.ufst_cpu_waste++;
1975 mutex_exit(&uf_stats.ufst_mutex);
1976 f->uf_retry = 0;
1978 rc = SFRC_SUCCESS;
1979 break;
1981 default:
1982 (void) set_state(f, UF_PANIC);
1983 TRIVIA((": failed] "));
1984 break;
1987 TRIVIA(("] "));
1988 return (rc);
1991 static sfrc_t
1992 sf_panic(
1993 ufs_failure_t *f,
1994 ufsa_t a,
1995 ufs_failure_states_t s)
1997 sfrc_t rc = SFRC_FAIL;
1999 TRIVIA(("[sf_panic, action is %s, prev. state is %s",
2000 act_name(a), state_name(f->uf_s)));
2001 ASSERT(s & UF_PANIC);
2003 switch (a) {
2004 case UFA_SET:
2005 f->uf_retry = -ufsfx_tune.uft_short_err_period;
2006 rc = SFRC_SUCCESS;
2007 break;
2009 case UFA_FOUND:
2010 default:
2011 real_panic(f, " ");
2013 /* LINTED: warning: logical expression always true: op "||" */
2014 ASSERT(DEBUG);
2016 (void) set_state(f, UF_UMOUNT); /* XXX UF_NOTFIX? */
2018 break;
2021 TRIVIA(("] "));
2022 return (rc);
2026 * minimum state function
2028 static sfrc_t
2029 sf_minimum(
2030 ufs_failure_t *f,
2031 ufsa_t a, /* LINTED argument unused in function: ignored */
2032 ufs_failure_states_t ignored)
2034 sfrc_t rc = SFRC_FAIL;
2036 TRIVIA(("[sf_minimum, action is %s", act_name(a)));
2038 switch (a) {
2039 case UFA_SET:
2040 f->uf_retry = 0;
2041 /* FALLTHROUGH */
2043 case UFA_FOUND:
2044 rc = SFRC_SUCCESS;
2045 break;
2047 default:
2048 (void) set_state(f, UF_PANIC);
2049 TRIVIA((": failed] "));
2050 break;
2053 TRIVIA(("] "));
2054 return (rc);
2057 static int
2058 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to)
2060 ufsd_t *s;
2061 int valid;
2063 HIDEOUS(("[state_trans_valid"));
2065 if (from & to)
2066 return (1);
2068 s = get_state_desc(to);
2071 * extra test is necessary since we want UF_UNDEF = 0,
2072 * (to detect freshly allocated memory)
2073 * but can't check for that value with a bit test
2075 valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev;
2077 HIDEOUS((": %svalid] ", valid? "": "in"));
2078 return (valid);
2081 static int
2082 terminal_state(ufs_failure_states_t state)
2084 ufsd_t *s;
2086 HIDEOUS(("[terminal_state"));
2088 s = get_state_desc(state);
2090 HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not "));
2091 return ((int)s->ud_attr.terminal);
2094 static void
2095 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp)
2097 MINUTE(("[alloc_lockfs_comment"));
2098 ASSERT(MUTEX_HELD(&f->uf_mutex));
2101 * ufs_fiolfs expects a kmem_alloc'ed comment;
2102 * it frees the comment if the lock fails
2103 * or else when the lock is unlocked.
2106 f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP);
2107 if (f->uf_lf.lf_comment) {
2108 char *from;
2109 size_t len;
2112 * use panic string if there's no previous comment
2113 * or if we're setting the error lock
2115 if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment ||
2116 lfp->lf_comlen <= 0)) {
2117 from = f->uf_panic_str;
2118 len = LOCKFS_MAXCOMMENTLEN;
2119 } else {
2120 from = lfp->lf_comment;
2121 len = lfp->lf_comlen;
2124 bcopy(from, f->uf_lf.lf_comment, len);
2125 f->uf_lf.lf_comlen = len;
2127 } else {
2128 f->uf_lf.lf_comlen = 0;
2130 MINUTE(("] "));
2133 static int
2134 set_lockfs(ufs_failure_t *f, struct lockfs *lfp)
2136 int (*handle_lockfs_rc)(ufs_failure_t *);
2137 int rc;
2139 MINUTE(("[set_lockfs"));
2140 ASSERT(MUTEX_HELD(&f->uf_mutex));
2141 ASSERT(!vfs_lock_held(f->uf_vfsp));
2142 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2144 if (!f->uf_ufsvfsp) {
2145 MINUTE((": ufsvfsp is NULL]\n"));
2146 return (0);
2149 ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock));
2151 if (!f->uf_ufsvfsp->vfs_root) {
2152 MINUTE((": vfs_root is NULL]\n"));
2153 return (0);
2156 alloc_lockfs_comment(f, lfp);
2157 f->uf_lf_err = 0;
2159 if (!LOCKFS_IS_ELOCK(lfp)) {
2160 lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK;
2161 VN_HOLD(f->uf_ufsvfsp->vfs_root);
2162 f->uf_lf_err =
2163 ufs__fiolfs(f->uf_ufsvfsp->vfs_root,
2164 &f->uf_lf, /* from_user */ 0, /* from_log */ 0);
2165 VN_RELE(f->uf_ufsvfsp->vfs_root);
2168 handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success;
2169 rc = handle_lockfs_rc(f);
2171 MINUTE(("] "));
2172 return (rc);
2175 static int
2176 lockfs_failure(ufs_failure_t *f)
2178 int error;
2179 ufs_failure_states_t s;
2181 TRIVIA(("[lockfs_failure"));
2182 ASSERT(MUTEX_HELD(&f->uf_mutex));
2184 if (!f->uf_ufsvfsp) {
2185 TRIVIA((": ufsvfsp is NULL]\n"));
2186 return (0);
2189 error = f->uf_lf_err;
2190 switch (error) {
2191 /* non-transient errors: */
2192 case EACCES: /* disk/in-core metadata reconciliation failed */
2193 case EPERM: /* inode reconciliation failed; incore inode changed? */
2194 case EIO: /* device is hard-locked or not responding */
2195 case EROFS: /* device is write-locked */
2196 case EDEADLK: /* can't lockfs; deadlock would result; */
2197 /* Swapping or saving accounting records */
2198 /* onto this fs can cause this errno. */
2200 MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)",
2201 fs_name(f), lock_name(&f->uf_lf),
2202 err_name(error), error));
2205 * if can't get lock, then fallback to panic, unless
2206 * unless unmount was requested (although unmount will
2207 * probably fail if the lock failed, so we'll panic
2208 * anyway
2211 s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK) ?
2212 UF_UMOUNT: UF_PANIC;
2214 if (!set_state(f, s)) {
2215 real_panic(f, " ");
2216 /*NOTREACHED*/
2217 break;
2219 break;
2222 case EBUSY:
2223 case EAGAIN:
2225 f->uf_retry = ufsfx_tune.uft_short_err_period;
2226 if (curthread->t_flag & T_DONTPEND) {
2227 curthread->t_flag &= ~T_DONTPEND;
2229 } else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) {
2230 ufs_failure_states_t state;
2232 * if we didn't know that the fix had started,
2233 * take note
2235 state = error == EBUSY? UF_LOCKED: UF_FIXING;
2236 if (!set_state(f, state)) {
2237 TRIVIA((": failed] "));
2238 return (0);
2241 break;
2243 default: /* some other non-fatal error */
2244 MINOR(("lockfs(\"%s\") of %s returned %s (%d)",
2245 lock_name(&f->uf_lf), fs_name(f),
2246 err_name(f->uf_lf_err), f->uf_lf_err));
2248 f->uf_retry = ufsfx_tune.uft_short_err_period;
2249 break;
2251 case EINVAL: /* unmounted? */
2252 (void) set_state(f, UF_NOTFIX);
2253 break;
2255 TRIVIA(("] "));
2256 return (1);
2259 static int
2260 lockfs_success(ufs_failure_t *f)
2262 TRIVIA(("[lockfs_success"));
2263 ASSERT(MUTEX_HELD(&f->uf_mutex));
2265 if (!f->uf_ufsvfsp) {
2266 TRIVIA((": ufsvfsp is NULL]\n"));
2267 return (0);
2270 switch (f->uf_lf.lf_lock) {
2271 case LOCKFS_ELOCK: /* error lock worked */
2273 if (!set_state(f, UF_LOCKED)) {
2274 TRIVIA((": failed] "));
2275 return (0);
2277 break;
2279 case LOCKFS_ULOCK: /* unlock worked */
2281 * how'd we get here?
2282 * This should be done from fsck's unlock,
2283 * not from this thread's context.
2285 cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f));
2286 ufsfx_unlockfs(f->uf_ufsvfsp);
2287 break;
2289 default:
2290 if (!set_state(f, UF_NOTFIX)) {
2291 TRIVIA((": failed] "));
2292 return (0);
2294 break;
2296 TRIVIA(("] "));
2297 return (1);
2301 * when fsck is running it puts its pid into the lockfs
2302 * comment structure, prefaced by PIDSTR
2304 const char *PIDSTR = "[pid:";
2305 static int
2306 fsck_active(ufs_failure_t *f)
2308 char *cp;
2309 int i, found, errlocked;
2310 size_t comlen;
2311 const int PIDSTRLEN = (int)strlen(PIDSTR);
2312 struct ulockfs *ulp = &f->uf_ufsvfsp->vfs_ulockfs;
2314 TRIVIA(("[fsck_active"));
2316 ASSERT(f);
2317 ASSERT(f->uf_s & UF_FIXING);
2318 ASSERT(MUTEX_HELD(&f->uf_mutex));
2319 ASSERT(f->uf_ufsvfsp);
2320 ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2321 ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock));
2323 mutex_enter(&ulp->ul_lock);
2324 cp = ulp->ul_lockfs.lf_comment;
2325 comlen = ulp->ul_lockfs.lf_comlen;
2326 errlocked = (int)ULOCKFS_IS_ELOCK(ulp);
2327 mutex_exit(&ulp->ul_lock);
2329 if (!cp || comlen == 0) {
2330 TRIVIA((": null comment or comlen <= 0, found:0]"));
2331 return (0);
2334 for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++)
2335 found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0;
2337 TRIVIA(("found:%d, is_elock:%d]", found, errlocked));
2338 return (errlocked & found);
2341 static const char unknown_fs[] = "<unknown fs>";
2342 static const char null_failure[] = "<NULL ufs failure record; unknown fs>";
2343 static const char mutated_vfs_bufp[] = "<mutated vfs_bufp, unknown fs>";
2344 static const char mutated_vfs_fs[] = "<mutated vfs_fs, unknown fs>";
2346 static char *
2347 fs_name(ufs_failure_t *f)
2349 HIDEOUS(("[fs_name"));
2350 ASSERT(MUTEX_HELD(&f->uf_mutex));
2352 if (!f) {
2353 HIDEOUS((": failure ptr is NULL]\n"));
2354 return ((char *)null_failure);
2357 if (f->uf_fsname[0] != '\0') {
2358 HIDEOUS((": return (uf_fsname)]\n"));
2359 return (f->uf_fsname);
2362 if (MUTEX_HELD(f->uf_vfs_lockp)) {
2363 if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) {
2364 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2365 (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp));
2366 return ((char *)mutated_vfs_bufp);
2368 if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) {
2369 HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2370 (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs));
2371 return ((char *)mutated_vfs_fs);
2373 if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs &&
2374 *f->uf_fs->fs_fsmnt != '\0') {
2375 HIDEOUS((": return (fs_fsmnt)]\n"));
2376 return (f->uf_fs->fs_fsmnt);
2380 HIDEOUS((": unknown file system]\n"));
2381 return ((char *)unknown_fs);
2384 #if defined(DEBUG)
2385 static char *
2386 lock_name(struct lockfs *lfp)
2388 struct lock_description *l;
2389 char *lname;
2391 HIDEOUS(("[lock_name"));
2393 lname = lock_desc[0].ld_name;
2394 for (l = &lock_desc[1]; l->ld_name != NULL; l++) {
2395 if (lfp && lfp->lf_lock == l->ld_type) {
2396 lname = l->ld_name;
2397 break;
2400 HIDEOUS(("]"));
2401 return (lname);
2404 static char *
2405 state_name(ufs_failure_states_t state)
2407 ufsd_t *s;
2409 HIDEOUS(("[state_name"));
2411 s = get_state_desc(state);
2413 HIDEOUS(("]"));
2414 return (s->ud_name);
2417 static char *
2418 err_name(int error)
2420 struct error_description *e;
2422 HIDEOUS(("[err_name"));
2424 for (e = &err_desc[1]; e->ed_name != NULL; e++) {
2425 if (error == e->ed_errno) {
2426 HIDEOUS(("]"));
2427 return (e->ed_name);
2430 HIDEOUS(("]"));
2431 return (err_desc[0].ed_name);
2434 static char *
2435 act_name(ufsa_t action)
2437 struct action_description *a;
2439 HIDEOUS(("[act_name"));
2441 for (a = &act_desc[1]; a->ad_name != NULL; a++) {
2442 if (action == a->ad_v) {
2443 HIDEOUS(("]"));
2444 return (a->ad_name);
2447 HIDEOUS(("]"));
2448 return (act_desc[0].ad_name);
2452 * dump failure list
2454 static void
2455 dump_uf_list(char *msg)
2457 ufs_failure_t *f;
2458 int i;
2459 int list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex);
2461 if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) {
2462 printf("dump_uf_list: couldn't get list lock\n");
2463 return;
2466 if (msg) {
2467 printf("\n%s", msg);
2469 printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n",
2470 ufs_fix.uq_lowat, ufs_fix.uq_ne);
2472 mutex_enter(&uf_stats.ufst_mutex);
2473 printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races);
2474 printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed);
2475 printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed);
2476 printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste);
2477 printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n",
2478 uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures);
2479 mutex_exit(&uf_stats.ufst_mutex);
2481 for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) {
2483 if (!mutex_tryenter(&f->uf_mutex)) {
2484 printf("%d.\t\"skipped - try enter failed\"\n", i);
2485 continue;
2488 dump_uf(f, i);
2490 mutex_exit(&f->uf_mutex);
2493 printf("\n");
2495 if (!list_was_locked)
2496 mutex_exit(&ufs_fix.uq_mutex);
2499 static void
2500 dump_uf(ufs_failure_t *f, int i)
2502 if (!f) {
2503 printf("dump_uf: NULL failure record\n");
2504 return;
2507 printf("%d.\t\"%s\" is %s.\n",
2508 i, fs_name(f), state_name(f->uf_s));
2509 printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f);
2510 printf("\tNext: 0x%p\t\tPrev: 0x%p\n",
2511 (void *)f->uf_next, (void *)f->uf_prev);
2513 if (f->uf_orig)
2514 printf("\tOriginal failure: 0x%p \"%s\"\n",
2515 (void *)f->uf_orig, f->uf_orig->uf_panic_str);
2517 printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n",
2518 (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp);
2519 printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp);
2520 printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp);
2522 if (f->uf_bp)
2523 printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs);
2524 else
2525 printf("\n");
2527 printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n",
2528 f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm);
2530 printf("\tFlags: (%d) %s%s%s%s", f->uf_flags,
2531 f->uf_flags & UFSFX_LCKONLY? "\"lock only\" " : "",
2532 f->uf_flags & UFSFX_LCKUMOUNT? "\"lock+unmount\" " : "",
2533 f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" " : "",
2534 f->uf_flags == 0? "<none>" : "");
2536 printf("\tRetry: %ld seconds\n", f->uf_retry);
2538 printf("\tLockfs:\ttype: %s\terror: %s (%d)\n",
2539 lock_name(&f->uf_lf), err_name(f->uf_lf_err), f->uf_lf_err);
2542 #endif /* DEBUG */
2545 * returns # of ufs_failures in a non-terminal state on queue
2546 * used to coordinate with hlock thread (see ufs_thread.c)
2547 * and to determine when the error lock thread may exit
2551 ufsfx_get_failure_qlen(void)
2553 ufs_failure_t *f;
2554 ufsd_t *s;
2555 int qlen = 0;
2557 MINUTE(("[ufsfx_get_failure_qlen"));
2559 if (!mutex_tryenter(&ufs_fix.uq_mutex))
2560 return (-1);
2563 * walk down failure list
2566 for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
2568 if (!mutex_tryenter(&f->uf_mutex))
2569 continue;
2571 s = get_state_desc(f->uf_s);
2573 if (s->ud_attr.terminal) {
2574 mutex_exit(&f->uf_mutex);
2575 continue;
2578 MINUTE((": found: %s, \"%s: %s\"\n",
2579 fs_name(f), state_name(f->uf_s), f->uf_panic_str));
2581 qlen++;
2582 mutex_exit(&f->uf_mutex);
2585 mutex_exit(&ufs_fix.uq_mutex);
2587 MINUTE((": qlen=%d]\n", qlen));
2589 return (qlen);
2593 * timeout routine
2594 * called to shutdown fix failure thread and server daemon
2596 static void
2597 ufsfx_kill_fix_failure_thread(void *arg)
2599 clock_t odelta = (clock_t)arg;
2600 int qlen;
2602 MAJOR(("[ufsfx_kill_fix_failure_thread"));
2604 qlen = ufsfx_get_failure_qlen();
2606 if (qlen < 0) {
2607 clock_t delta;
2609 delta = odelta << 1;
2610 if (delta <= 0)
2611 delta = INT_MAX;
2613 (void) timeout(ufsfx_kill_fix_failure_thread,
2614 (void *)delta, delta);
2615 MAJOR((": rescheduled"));
2617 } else if (qlen == 0) {
2618 ufs_thread_exit(&ufs_fix);
2619 MAJOR((": killed"));
2622 * else
2623 * let timeout expire
2625 MAJOR(("]\n"));