dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / ufs / ufs_lockfs.c
blob3dcd953a51aba64b342e9a610f722c1b35c862c5
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/types.h>
26 #include <sys/t_lock.h>
27 #include <sys/param.h>
28 #include <sys/time.h>
29 #include <sys/systm.h>
30 #include <sys/sysmacros.h>
31 #include <sys/resource.h>
32 #include <sys/signal.h>
33 #include <sys/cred.h>
34 #include <sys/user.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/proc.h>
39 #include <sys/disp.h>
40 #include <sys/file.h>
41 #include <sys/fcntl.h>
42 #include <sys/flock.h>
43 #include <sys/atomic.h>
44 #include <sys/kmem.h>
45 #include <sys/uio.h>
46 #include <sys/conf.h>
47 #include <sys/mman.h>
48 #include <sys/pathname.h>
49 #include <sys/debug.h>
50 #include <sys/vmsystm.h>
51 #include <sys/cmn_err.h>
52 #include <sys/acct.h>
53 #include <sys/dnlc.h>
54 #include <sys/swap.h>
56 #include <sys/fs/ufs_fs.h>
57 #include <sys/fs/ufs_inode.h>
58 #include <sys/fs/ufs_fsdir.h>
59 #include <sys/fs/ufs_trans.h>
60 #include <sys/fs/ufs_panic.h>
61 #include <sys/fs/ufs_mount.h>
62 #include <sys/fs/ufs_bio.h>
63 #include <sys/fs/ufs_log.h>
64 #include <sys/fs/ufs_quota.h>
65 #include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */
66 #include <sys/errno.h>
67 #include <sys/sysinfo.h>
69 #include <vm/hat.h>
70 #include <vm/pvn.h>
71 #include <vm/as.h>
72 #include <vm/seg.h>
73 #include <vm/seg_map.h>
74 #include <vm/seg_vn.h>
75 #include <vm/rm.h>
76 #include <vm/anon.h>
77 #include <sys/swap.h>
78 #include <sys/dnlc.h>
80 extern struct vnode *common_specvp(struct vnode *vp);
82 /* error lock status */
83 #define UN_ERRLCK (-1)
84 #define SET_ERRLCK 1
85 #define RE_ERRLCK 2
86 #define NO_ERRLCK 0
89 * Index to be used in TSD for storing lockfs data
91 uint_t ufs_lockfs_key;
93 typedef struct _ulockfs_info {
94 struct _ulockfs_info *next;
95 struct ulockfs *ulp;
96 uint_t flags;
97 } ulockfs_info_t;
99 #define ULOCK_INFO_FALLOCATE 0x00000001 /* fallocate thread */
102 * Check in TSD that whether we are already doing any VOP on this filesystem
104 #define IS_REC_VOP(found, head, ulp, free) \
106 ulockfs_info_t *_curr; \
108 for (found = 0, free = NULL, _curr = head; \
109 _curr != NULL; _curr = _curr->next) { \
110 if ((free == NULL) && \
111 (_curr->ulp == NULL)) \
112 free = _curr; \
113 if (_curr->ulp == ulp) { \
114 found = 1; \
115 break; \
121 * Get the lockfs data from TSD so that lockfs handles the recursive VOP
122 * properly
124 #define SEARCH_ULOCKFSP(head, ulp, info) \
126 ulockfs_info_t *_curr; \
128 for (_curr = head; _curr != NULL; \
129 _curr = _curr->next) { \
130 if (_curr->ulp == ulp) { \
131 break; \
135 info = _curr; \
139 * Validate lockfs request
141 static int
142 ufs_getlfd(
143 struct lockfs *lockfsp, /* new lock request */
144 struct lockfs *ul_lockfsp) /* old lock state */
146 int error = 0;
149 * no input flags defined
151 if (lockfsp->lf_flags != 0) {
152 error = EINVAL;
153 goto errout;
157 * check key
159 if (!LOCKFS_IS_ULOCK(ul_lockfsp))
160 if (lockfsp->lf_key != ul_lockfsp->lf_key) {
161 error = EINVAL;
162 goto errout;
165 lockfsp->lf_key = ul_lockfsp->lf_key + 1;
167 errout:
168 return (error);
172 * ufs_checkaccton
173 * check if accounting is turned on on this fs
177 ufs_checkaccton(struct vnode *vp)
179 if (acct_fs_in_use(vp))
180 return (EDEADLK);
181 return (0);
185 * ufs_checkswapon
186 * check if local swapping is to file on this fs
189 ufs_checkswapon(struct vnode *vp)
191 struct swapinfo *sip;
193 mutex_enter(&swapinfo_lock);
194 for (sip = swapinfo; sip; sip = sip->si_next)
195 if (sip->si_vp->v_vfsp == vp->v_vfsp) {
196 mutex_exit(&swapinfo_lock);
197 return (EDEADLK);
199 mutex_exit(&swapinfo_lock);
200 return (0);
204 * ufs_freeze
205 * pend future accesses for current lock and desired lock
207 void
208 ufs_freeze(struct ulockfs *ulp, struct lockfs *lockfsp)
211 * set to new lock type
213 ulp->ul_lockfs.lf_lock = lockfsp->lf_lock;
214 ulp->ul_lockfs.lf_key = lockfsp->lf_key;
215 ulp->ul_lockfs.lf_comlen = lockfsp->lf_comlen;
216 ulp->ul_lockfs.lf_comment = lockfsp->lf_comment;
218 ulp->ul_fs_lock = (1 << ulp->ul_lockfs.lf_lock);
222 * All callers of ufs_quiesce() atomically increment ufs_quiesce_pend before
223 * starting ufs_quiesce() protocol and decrement it only when a file system no
224 * longer has to be in quiescent state. This allows ufs_pageio() to detect
225 * that another thread wants to quiesce a file system. See more comments in
226 * ufs_pageio().
228 ulong_t ufs_quiesce_pend = 0;
231 * ufs_quiesce
232 * wait for outstanding accesses to finish
235 ufs_quiesce(struct ulockfs *ulp)
237 int error = 0;
238 ulockfs_info_t *head;
239 ulockfs_info_t *info;
240 klwp_t *lwp = ttolwp(curthread);
242 head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
243 SEARCH_ULOCKFSP(head, ulp, info);
246 * We have to keep /proc away from stopping us after we applied
247 * the softlock but before we got a chance to clear it again.
248 * prstop() may pagefault and become stuck on the softlock still
249 * pending.
251 if (lwp != NULL)
252 lwp->lwp_nostop++;
255 * Set a softlock to suspend future ufs_vnops so that
256 * this lockfs request will not be starved
258 ULOCKFS_SET_SLOCK(ulp);
259 ASSERT(ufs_quiesce_pend);
261 /* check if there is any outstanding ufs vnodeops calls */
262 while (ulp->ul_vnops_cnt || ulp->ul_falloc_cnt) {
264 * use timed version of cv_wait_sig() to make sure we don't
265 * miss a wake up call from ufs_pageio() when it doesn't use
266 * ul_lock.
268 * when a fallocate thread comes in, the only way it returns
269 * from this function is if there are no other vnode operations
270 * going on (remember fallocate threads are tracked using
271 * ul_falloc_cnt not ul_vnops_cnt), and another fallocate thread
272 * hasn't already grabbed the fs write lock.
274 if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
275 if (!ulp->ul_vnops_cnt && !ULOCKFS_IS_FWLOCK(ulp))
276 goto out;
278 if (!cv_reltimedwait_sig(&ulp->ul_cv, &ulp->ul_lock, hz,
279 TR_CLOCK_TICK)) {
280 error = EINTR;
281 goto out;
285 out:
287 * unlock the soft lock
289 ULOCKFS_CLR_SLOCK(ulp);
291 if (lwp != NULL)
292 lwp->lwp_nostop--;
294 return (error);
298 * ufs_flush_inode
301 ufs_flush_inode(struct inode *ip, void *arg)
303 int error;
304 int saverror = 0;
307 * wrong file system; keep looking
309 if (ip->i_ufsvfs != (struct ufsvfs *)arg)
310 return (0);
313 * asynchronously push all the dirty pages
315 if (((error = TRANS_SYNCIP(ip, B_ASYNC, 0, TOP_SYNCIP_FLUSHI)) != 0) &&
316 (error != EAGAIN))
317 saverror = error;
319 * wait for io and discard all mappings
321 if (error = TRANS_SYNCIP(ip, B_INVAL, 0, TOP_SYNCIP_FLUSHI))
322 saverror = error;
324 if (ITOV(ip)->v_type == VDIR) {
325 dnlc_dir_purge(&ip->i_danchor);
328 return (saverror);
332 * ufs_flush
333 * Flush everything that is currently dirty; this includes invalidating
334 * any mappings.
337 ufs_flush(struct vfs *vfsp)
339 int error;
340 int saverror = 0;
341 struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
342 struct fs *fs = ufsvfsp->vfs_fs;
343 int tdontblock = 0;
345 ASSERT(vfs_lock_held(vfsp));
348 * purge dnlc
350 (void) dnlc_purge_vfsp(vfsp, 0);
353 * drain the delete and idle threads
355 ufs_delete_drain(vfsp, 0, 0);
356 ufs_idle_drain(vfsp);
359 * flush and invalidate quota records
361 (void) qsync(ufsvfsp);
364 * flush w/invalidate the inodes for vfsp
366 if (error = ufs_scan_inodes(0, ufs_flush_inode, ufsvfsp, ufsvfsp))
367 saverror = error;
370 * synchronously flush superblock and summary info
372 if (fs->fs_ronly == 0 && fs->fs_fmod) {
373 fs->fs_fmod = 0;
374 TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_FLUSH);
377 * flush w/invalidate block device pages and buf cache
379 if ((error = fop_putpage(common_specvp(ufsvfsp->vfs_devvp),
380 (offset_t)0, 0, B_INVAL, CRED(), NULL)) > 0)
381 saverror = error;
383 (void) bflush((dev_t)vfsp->vfs_dev);
384 (void) bfinval((dev_t)vfsp->vfs_dev, 0);
387 * drain the delete and idle threads again
389 ufs_delete_drain(vfsp, 0, 0);
390 ufs_idle_drain(vfsp);
393 * play with the clean flag
395 if (saverror == 0)
396 ufs_checkclean(vfsp);
399 * Flush any outstanding transactions and roll the log
400 * only if we are supposed to do, i.e. LDL_NOROLL not set.
401 * We can not simply check for fs_ronly here since fsck also may
402 * use this code to roll the log on a read-only filesystem, e.g.
403 * root during early stages of boot, if other then a sanity check is
404 * done, it will clear LDL_NOROLL before.
405 * In addition we assert that the deltamap does not contain any deltas
406 * in case LDL_NOROLL is set since this is not supposed to happen.
408 if (TRANS_ISTRANS(ufsvfsp)) {
409 ml_unit_t *ul = ufsvfsp->vfs_log;
410 mt_map_t *mtm = ul->un_deltamap;
412 if (ul->un_flags & LDL_NOROLL) {
413 ASSERT(mtm->mtm_nme == 0);
414 } else {
416 * Do not set T_DONTBLOCK if there is a
417 * transaction opened by caller.
419 if (curthread->t_flag & T_DONTBLOCK)
420 tdontblock = 1;
421 else
422 curthread->t_flag |= T_DONTBLOCK;
424 TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_FLUSH,
425 TOP_COMMIT_SIZE, &error);
427 if (!error) {
428 TRANS_END_SYNC(ufsvfsp, &saverror,
429 TOP_COMMIT_FLUSH,
430 TOP_COMMIT_SIZE);
433 if (tdontblock == 0)
434 curthread->t_flag &= ~T_DONTBLOCK;
436 logmap_roll_dev(ufsvfsp->vfs_log);
440 return (saverror);
444 * ufs_thaw_wlock
445 * special processing when thawing down to wlock
447 static int
448 ufs_thaw_wlock(struct inode *ip, void *arg)
451 * wrong file system; keep looking
453 if (ip->i_ufsvfs != (struct ufsvfs *)arg)
454 return (0);
457 * iupdat refuses to clear flags if the fs is read only. The fs
458 * may become read/write during the lock and we wouldn't want
459 * these inodes being written to disk. So clear the flags.
461 rw_enter(&ip->i_contents, RW_WRITER);
462 ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
463 rw_exit(&ip->i_contents);
466 * pages are mlocked -- fail wlock
468 if (ITOV(ip)->v_type != VCHR && vn_has_cached_data(ITOV(ip)))
469 return (EBUSY);
471 return (0);
475 * ufs_thaw_hlock
476 * special processing when thawing down to hlock or elock
478 static int
479 ufs_thaw_hlock(struct inode *ip, void *arg)
481 struct vnode *vp = ITOV(ip);
484 * wrong file system; keep looking
486 if (ip->i_ufsvfs != (struct ufsvfs *)arg)
487 return (0);
490 * blow away all pages - even if they are mlocked
492 do {
493 (void) TRANS_SYNCIP(ip, B_INVAL | B_FORCE, 0, TOP_SYNCIP_HLOCK);
494 } while ((vp->v_type != VCHR) && vn_has_cached_data(vp));
495 rw_enter(&ip->i_contents, RW_WRITER);
496 ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
497 rw_exit(&ip->i_contents);
499 return (0);
503 * ufs_thaw
504 * thaw file system lock down to current value
507 ufs_thaw(struct vfs *vfsp, struct ufsvfs *ufsvfsp, struct ulockfs *ulp)
509 int error = 0;
510 int noidel = (int)(ulp->ul_flag & ULOCKFS_NOIDEL);
513 * if wlock or hlock or elock
515 if (ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_HLOCK(ulp) ||
516 ULOCKFS_IS_ELOCK(ulp)) {
519 * don't keep access times
520 * don't free deleted files
521 * if superblock writes are allowed, limit them to me for now
523 ulp->ul_flag |= (ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
524 if (ulp->ul_sbowner != (kthread_id_t)-1)
525 ulp->ul_sbowner = curthread;
528 * wait for writes for deleted files and superblock updates
530 (void) ufs_flush(vfsp);
533 * now make sure the quota file is up-to-date
534 * expensive; but effective
536 error = ufs_flush(vfsp);
538 * no one can write the superblock
540 ulp->ul_sbowner = (kthread_id_t)-1;
543 * special processing for wlock/hlock/elock
545 if (ULOCKFS_IS_WLOCK(ulp)) {
546 if (error)
547 goto errout;
548 error = bfinval(ufsvfsp->vfs_dev, 0);
549 if (error)
550 goto errout;
551 error = ufs_scan_inodes(0, ufs_thaw_wlock,
552 (void *)ufsvfsp, ufsvfsp);
553 if (error)
554 goto errout;
556 if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
557 error = 0;
558 (void) ufs_scan_inodes(0, ufs_thaw_hlock,
559 (void *)ufsvfsp, ufsvfsp);
560 (void) bfinval(ufsvfsp->vfs_dev, 1);
562 } else {
565 * okay to keep access times
566 * okay to free deleted files
567 * okay to write the superblock
569 ulp->ul_flag &= ~(ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
570 ulp->ul_sbowner = NULL;
573 * flush in case deleted files are in memory
575 if (noidel) {
576 if (error = ufs_flush(vfsp))
577 goto errout;
581 errout:
582 cv_broadcast(&ulp->ul_cv);
583 return (error);
587 * ufs_reconcile_fs
588 * reconcile incore superblock with ondisk superblock
591 ufs_reconcile_fs(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
593 struct fs *mfs; /* in-memory superblock */
594 struct fs *dfs; /* on-disk superblock */
595 struct buf *bp; /* on-disk superblock buf */
596 int needs_unlock;
597 char finished_fsclean;
599 mfs = ufsvfsp->vfs_fs;
602 * get the on-disk copy of the superblock
604 bp = UFS_BREAD(ufsvfsp, vfsp->vfs_dev, SBLOCK, SBSIZE);
605 bp->b_flags |= (B_STALE|B_AGE);
606 if (bp->b_flags & B_ERROR) {
607 brelse(bp);
608 return (EIO);
610 dfs = bp->b_un.b_fs;
612 /* error locks may only unlock after the fs has been made consistent */
613 if (errlck == UN_ERRLCK) {
614 if (dfs->fs_clean == FSFIX) { /* being repaired */
615 brelse(bp);
616 return (EAGAIN);
618 /* repair not yet started? */
619 finished_fsclean = TRANS_ISTRANS(ufsvfsp)? FSLOG: FSCLEAN;
620 if (dfs->fs_clean != finished_fsclean) {
621 brelse(bp);
622 return (EBUSY);
627 * if superblock has changed too much, abort
629 if ((mfs->fs_sblkno != dfs->fs_sblkno) ||
630 (mfs->fs_cblkno != dfs->fs_cblkno) ||
631 (mfs->fs_iblkno != dfs->fs_iblkno) ||
632 (mfs->fs_dblkno != dfs->fs_dblkno) ||
633 (mfs->fs_cgoffset != dfs->fs_cgoffset) ||
634 (mfs->fs_cgmask != dfs->fs_cgmask) ||
635 (mfs->fs_bsize != dfs->fs_bsize) ||
636 (mfs->fs_fsize != dfs->fs_fsize) ||
637 (mfs->fs_frag != dfs->fs_frag) ||
638 (mfs->fs_bmask != dfs->fs_bmask) ||
639 (mfs->fs_fmask != dfs->fs_fmask) ||
640 (mfs->fs_bshift != dfs->fs_bshift) ||
641 (mfs->fs_fshift != dfs->fs_fshift) ||
642 (mfs->fs_fragshift != dfs->fs_fragshift) ||
643 (mfs->fs_fsbtodb != dfs->fs_fsbtodb) ||
644 (mfs->fs_sbsize != dfs->fs_sbsize) ||
645 (mfs->fs_nindir != dfs->fs_nindir) ||
646 (mfs->fs_nspf != dfs->fs_nspf) ||
647 (mfs->fs_trackskew != dfs->fs_trackskew) ||
648 (mfs->fs_cgsize != dfs->fs_cgsize) ||
649 (mfs->fs_ntrak != dfs->fs_ntrak) ||
650 (mfs->fs_nsect != dfs->fs_nsect) ||
651 (mfs->fs_spc != dfs->fs_spc) ||
652 (mfs->fs_cpg != dfs->fs_cpg) ||
653 (mfs->fs_ipg != dfs->fs_ipg) ||
654 (mfs->fs_fpg != dfs->fs_fpg) ||
655 (mfs->fs_postblformat != dfs->fs_postblformat) ||
656 (mfs->fs_magic != dfs->fs_magic)) {
657 brelse(bp);
658 return (EACCES);
660 if (dfs->fs_clean == FSBAD || FSOKAY != dfs->fs_state + dfs->fs_time)
661 if (mfs->fs_clean == FSLOG) {
662 brelse(bp);
663 return (EACCES);
667 * get new summary info
669 if (ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, dfs)) {
670 brelse(bp);
671 return (EIO);
675 * release old summary info and update in-memory superblock
677 kmem_free(mfs->fs_u.fs_csp, mfs->fs_cssize);
678 mfs->fs_u.fs_csp = dfs->fs_u.fs_csp; /* Only entry 0 used */
681 * update fields allowed to change
683 mfs->fs_size = dfs->fs_size;
684 mfs->fs_dsize = dfs->fs_dsize;
685 mfs->fs_ncg = dfs->fs_ncg;
686 mfs->fs_minfree = dfs->fs_minfree;
687 mfs->fs_rotdelay = dfs->fs_rotdelay;
688 mfs->fs_rps = dfs->fs_rps;
689 mfs->fs_maxcontig = dfs->fs_maxcontig;
690 mfs->fs_maxbpg = dfs->fs_maxbpg;
691 mfs->fs_csmask = dfs->fs_csmask;
692 mfs->fs_csshift = dfs->fs_csshift;
693 mfs->fs_optim = dfs->fs_optim;
694 mfs->fs_csaddr = dfs->fs_csaddr;
695 mfs->fs_cssize = dfs->fs_cssize;
696 mfs->fs_ncyl = dfs->fs_ncyl;
697 mfs->fs_cstotal = dfs->fs_cstotal;
698 mfs->fs_reclaim = dfs->fs_reclaim;
700 if (mfs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
701 mfs->fs_reclaim &= ~FS_RECLAIM;
702 mfs->fs_reclaim |= FS_RECLAIMING;
703 ufs_thread_start(&ufsvfsp->vfs_reclaim,
704 ufs_thread_reclaim, vfsp);
707 /* XXX What to do about sparecon? */
709 /* XXX need to copy volume label */
712 * ondisk clean flag overrides inmemory clean flag iff == FSBAD
713 * or if error-locked and ondisk is now clean
715 needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
716 if (needs_unlock)
717 mutex_enter(&ufsvfsp->vfs_lock);
719 if (errlck == UN_ERRLCK) {
720 if (finished_fsclean == dfs->fs_clean)
721 mfs->fs_clean = finished_fsclean;
722 else
723 mfs->fs_clean = FSBAD;
724 mfs->fs_state = FSOKAY - dfs->fs_time;
727 if (FSOKAY != dfs->fs_state + dfs->fs_time ||
728 (dfs->fs_clean == FSBAD))
729 mfs->fs_clean = FSBAD;
731 if (needs_unlock)
732 mutex_exit(&ufsvfsp->vfs_lock);
734 brelse(bp);
736 return (0);
740 * ufs_reconcile_inode
741 * reconcile ondisk inode with incore inode
743 static int
744 ufs_reconcile_inode(struct inode *ip, void *arg)
746 int i;
747 int ndaddr;
748 int niaddr;
749 struct dinode *dp; /* ondisk inode */
750 struct buf *bp = NULL;
751 uid_t d_uid;
752 gid_t d_gid;
753 int error = 0;
754 struct fs *fs;
757 * not an inode we care about
759 if (ip->i_ufsvfs != (struct ufsvfs *)arg)
760 return (0);
762 fs = ip->i_fs;
765 * Inode reconciliation fails: we made the filesystem quiescent
766 * and we did a ufs_flush() before calling ufs_reconcile_inode()
767 * and thus the inode should not have been changed inbetween.
768 * Any discrepancies indicate a logic error and a pretty
769 * significant run-state inconsistency we should complain about.
771 if (ip->i_flag & (IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG)) {
772 cmn_err(CE_WARN, "%s: Inode reconciliation failed for"
773 "inode %llu", fs->fs_fsmnt, (u_longlong_t)ip->i_number);
774 return (EINVAL);
778 * get the dinode
780 bp = UFS_BREAD(ip->i_ufsvfs,
781 ip->i_dev, (daddr_t)fsbtodb(fs, itod(fs, ip->i_number)),
782 (int)fs->fs_bsize);
783 if (bp->b_flags & B_ERROR) {
784 brelse(bp);
785 return (EIO);
787 dp = bp->b_un.b_dino;
788 dp += itoo(fs, ip->i_number);
791 * handle Sun's implementation of EFT
793 d_uid = (dp->di_suid == UID_LONG) ? dp->di_uid : (uid_t)dp->di_suid;
794 d_gid = (dp->di_sgid == GID_LONG) ? dp->di_gid : (uid_t)dp->di_sgid;
796 rw_enter(&ip->i_contents, RW_WRITER);
799 * some fields are not allowed to change
801 if ((ip->i_mode != dp->di_mode) ||
802 (ip->i_gen != dp->di_gen) ||
803 (ip->i_uid != d_uid) ||
804 (ip->i_gid != d_gid)) {
805 error = EACCES;
806 goto out;
810 * and some are allowed to change
812 ip->i_size = dp->di_size;
813 ip->i_ic.ic_flags = dp->di_ic.ic_flags;
814 ip->i_blocks = dp->di_blocks;
815 ip->i_nlink = dp->di_nlink;
816 if (ip->i_flag & IFASTSYMLNK) {
817 ndaddr = 1;
818 niaddr = 0;
819 } else {
820 ndaddr = NDADDR;
821 niaddr = NIADDR;
823 for (i = 0; i < ndaddr; ++i)
824 ip->i_db[i] = dp->di_db[i];
825 for (i = 0; i < niaddr; ++i)
826 ip->i_ib[i] = dp->di_ib[i];
828 out:
829 rw_exit(&ip->i_contents);
830 brelse(bp);
831 return (error);
835 * ufs_reconcile
836 * reconcile ondisk superblock/inodes with any incore
838 static int
839 ufs_reconcile(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
841 int error = 0;
844 * get rid of as much inmemory data as possible
846 (void) ufs_flush(vfsp);
849 * reconcile the superblock and inodes
851 if (error = ufs_reconcile_fs(vfsp, ufsvfsp, errlck))
852 return (error);
853 if (error = ufs_scan_inodes(0, ufs_reconcile_inode, ufsvfsp, ufsvfsp))
854 return (error);
856 * allocation blocks may be incorrect; get rid of them
858 (void) ufs_flush(vfsp);
860 return (error);
864 * File system locking
867 ufs_fiolfs(struct vnode *vp, struct lockfs *lockfsp, int from_log)
869 return (ufs__fiolfs(vp, lockfsp, /* from_user */ 1, from_log));
872 /* kernel-internal interface, also used by fix-on-panic */
874 ufs__fiolfs(
875 struct vnode *vp,
876 struct lockfs *lockfsp,
877 int from_user,
878 int from_log)
880 struct ulockfs *ulp;
881 struct lockfs lfs;
882 int error;
883 struct vfs *vfsp;
884 struct ufsvfs *ufsvfsp;
885 int errlck = NO_ERRLCK;
886 int poll_events = POLLPRI;
887 extern struct pollhead ufs_pollhd;
888 ulockfs_info_t *head;
889 ulockfs_info_t *info;
890 int signal = 0;
892 /* check valid lock type */
893 if (!lockfsp || lockfsp->lf_lock > LOCKFS_MAXLOCK)
894 return (EINVAL);
896 if (!vp || !vp->v_vfsp || !vp->v_vfsp->vfs_data)
897 return (EIO);
899 vfsp = vp->v_vfsp;
901 if (vfsp->vfs_flag & VFS_UNMOUNTED) /* has been unmounted */
902 return (EIO);
904 /* take the lock and check again */
905 vfs_lock_wait(vfsp);
906 if (vfsp->vfs_flag & VFS_UNMOUNTED) {
907 vfs_unlock(vfsp);
908 return (EIO);
912 * Can't wlock or ro/elock fs with accounting or local swap file
913 * We need to check for this before we grab the ul_lock to avoid
914 * deadlocks with the accounting framework.
916 if ((LOCKFS_IS_WLOCK(lockfsp) || LOCKFS_IS_ELOCK(lockfsp) ||
917 LOCKFS_IS_ROELOCK(lockfsp)) && !from_log) {
918 if (ufs_checkaccton(vp) || ufs_checkswapon(vp)) {
919 vfs_unlock(vfsp);
920 return (EDEADLK);
924 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
925 ulp = &ufsvfsp->vfs_ulockfs;
926 head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
927 SEARCH_ULOCKFSP(head, ulp, info);
930 * Suspend both the reclaim thread and the delete thread.
931 * This must be done outside the lockfs locking protocol.
933 ufs_thread_suspend(&ufsvfsp->vfs_reclaim);
934 ufs_thread_suspend(&ufsvfsp->vfs_delete);
936 mutex_enter(&ulp->ul_lock);
937 atomic_inc_ulong(&ufs_quiesce_pend);
940 * Quit if there is another lockfs request in progress
941 * that is waiting for existing ufs_vnops to complete.
943 if (ULOCKFS_IS_BUSY(ulp)) {
944 error = EBUSY;
945 goto errexit;
948 /* cannot ulocked or downgrade a hard-lock */
949 if (ULOCKFS_IS_HLOCK(ulp)) {
950 error = EIO;
951 goto errexit;
954 /* an error lock may be unlocked or relocked, only */
955 if (ULOCKFS_IS_ELOCK(ulp)) {
956 if (!LOCKFS_IS_ULOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
957 error = EBUSY;
958 goto errexit;
963 * a read-only error lock may only be upgraded to an
964 * error lock or hard lock
966 if (ULOCKFS_IS_ROELOCK(ulp)) {
967 if (!LOCKFS_IS_HLOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
968 error = EBUSY;
969 goto errexit;
974 * until read-only error locks are fully implemented
975 * just return EINVAL
977 if (LOCKFS_IS_ROELOCK(lockfsp)) {
978 error = EINVAL;
979 goto errexit;
983 * an error lock may only be applied if the file system is
984 * unlocked or already error locked.
985 * (this is to prevent the case where a fs gets changed out from
986 * underneath a fs that is locked for backup,
987 * that is, name/delete/write-locked.)
989 if ((!ULOCKFS_IS_ULOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp) &&
990 !ULOCKFS_IS_ROELOCK(ulp)) &&
991 (LOCKFS_IS_ELOCK(lockfsp) || LOCKFS_IS_ROELOCK(lockfsp))) {
992 error = EBUSY;
993 goto errexit;
996 /* get and validate the input lockfs request */
997 if (error = ufs_getlfd(lockfsp, &ulp->ul_lockfs))
998 goto errexit;
1001 * save current ulockfs struct
1003 bcopy(&ulp->ul_lockfs, &lfs, sizeof (struct lockfs));
1006 * Freeze the file system (pend future accesses)
1008 ufs_freeze(ulp, lockfsp);
1011 * Set locking in progress because ufs_quiesce may free the
1012 * ul_lock mutex.
1014 ULOCKFS_SET_BUSY(ulp);
1015 /* update the ioctl copy */
1016 LOCKFS_SET_BUSY(&ulp->ul_lockfs);
1019 * We need to unset FWLOCK status before we call ufs_quiesce
1020 * so that the thread doesnt get suspended. We do this only if
1021 * this (fallocate) thread requested an unlock operation.
1023 if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
1024 if (!ULOCKFS_IS_WLOCK(ulp))
1025 ULOCKFS_CLR_FWLOCK(ulp);
1029 * Quiesce (wait for outstanding accesses to finish)
1031 if (error = ufs_quiesce(ulp)) {
1033 * Interrupted due to signal. There could still be
1034 * pending vnops.
1036 signal = 1;
1039 * We do broadcast because lock-status
1040 * could be reverted to old status.
1042 cv_broadcast(&ulp->ul_cv);
1043 goto errout;
1047 * If the fallocate thread requested a write fs lock operation
1048 * then we set fwlock status in the ulp.
1050 if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
1051 if (ULOCKFS_IS_WLOCK(ulp))
1052 ULOCKFS_SET_FWLOCK(ulp);
1056 * save error lock status to pass down to reconcilation
1057 * routines and for later cleanup
1059 if (LOCKFS_IS_ELOCK(&lfs) && ULOCKFS_IS_ULOCK(ulp))
1060 errlck = UN_ERRLCK;
1062 if (ULOCKFS_IS_ELOCK(ulp) || ULOCKFS_IS_ROELOCK(ulp)) {
1063 int needs_unlock;
1064 int needs_sbwrite;
1066 poll_events |= POLLERR;
1067 errlck = LOCKFS_IS_ELOCK(&lfs) || LOCKFS_IS_ROELOCK(&lfs) ?
1068 RE_ERRLCK : SET_ERRLCK;
1070 needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
1071 if (needs_unlock)
1072 mutex_enter(&ufsvfsp->vfs_lock);
1074 /* disable delayed i/o */
1075 needs_sbwrite = 0;
1077 if (errlck == SET_ERRLCK) {
1078 ufsvfsp->vfs_fs->fs_clean = FSBAD;
1079 needs_sbwrite = 1;
1082 needs_sbwrite |= ufsvfsp->vfs_dio;
1083 ufsvfsp->vfs_dio = 0;
1085 if (needs_unlock)
1086 mutex_exit(&ufsvfsp->vfs_lock);
1088 if (needs_sbwrite) {
1089 ulp->ul_sbowner = curthread;
1090 TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_STABLE);
1092 if (needs_unlock)
1093 mutex_enter(&ufsvfsp->vfs_lock);
1095 ufsvfsp->vfs_fs->fs_fmod = 0;
1097 if (needs_unlock)
1098 mutex_exit(&ufsvfsp->vfs_lock);
1103 * reconcile superblock and inodes if was wlocked
1105 if (LOCKFS_IS_WLOCK(&lfs) || LOCKFS_IS_ELOCK(&lfs)) {
1106 if (error = ufs_reconcile(vfsp, ufsvfsp, errlck))
1107 goto errout;
1109 * in case the fs grew; reset the metadata map for logging tests
1111 TRANS_MATA_UMOUNT(ufsvfsp);
1112 TRANS_MATA_MOUNT(ufsvfsp);
1113 TRANS_MATA_SI(ufsvfsp, ufsvfsp->vfs_fs);
1117 * At least everything *currently* dirty goes out.
1120 if ((error = ufs_flush(vfsp)) != 0 && !ULOCKFS_IS_HLOCK(ulp) &&
1121 !ULOCKFS_IS_ELOCK(ulp))
1122 goto errout;
1125 * thaw file system and wakeup pended processes
1127 if (error = ufs_thaw(vfsp, ufsvfsp, ulp))
1128 if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp))
1129 goto errout;
1132 * reset modified flag if not already write locked
1134 if (!LOCKFS_IS_WLOCK(&lfs))
1135 ULOCKFS_CLR_MOD(ulp);
1138 * idle the lock struct
1140 ULOCKFS_CLR_BUSY(ulp);
1141 /* update the ioctl copy */
1142 LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
1145 * free current comment
1147 if (lfs.lf_comment && lfs.lf_comlen != 0) {
1148 kmem_free(lfs.lf_comment, lfs.lf_comlen);
1149 lfs.lf_comment = NULL;
1150 lfs.lf_comlen = 0;
1153 /* do error lock cleanup */
1154 if (errlck == UN_ERRLCK)
1155 ufsfx_unlockfs(ufsvfsp);
1157 else if (errlck == RE_ERRLCK)
1158 ufsfx_lockfs(ufsvfsp);
1160 /* don't allow error lock from user to invoke panic */
1161 else if (from_user && errlck == SET_ERRLCK &&
1162 !(ufsvfsp->vfs_fsfx.fx_flags & (UFSMNT_ONERROR_PANIC >> 4)))
1163 (void) ufs_fault(ufsvfsp->vfs_root,
1164 ulp->ul_lockfs.lf_comment && ulp->ul_lockfs.lf_comlen > 0 ?
1165 ulp->ul_lockfs.lf_comment: "user-applied error lock");
1167 atomic_dec_ulong(&ufs_quiesce_pend);
1168 mutex_exit(&ulp->ul_lock);
1169 vfs_unlock(vfsp);
1171 if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs))
1172 poll_events |= POLLERR;
1174 pollwakeup(&ufs_pollhd, poll_events);
1177 * Allow both the delete thread and the reclaim thread to
1178 * continue.
1180 ufs_thread_continue(&ufsvfsp->vfs_delete);
1181 ufs_thread_continue(&ufsvfsp->vfs_reclaim);
1183 return (0);
1185 errout:
1187 * Lock failed. Reset the old lock in ufsvfs if not hard locked.
1189 if (!LOCKFS_IS_HLOCK(&ulp->ul_lockfs)) {
1190 bcopy(&lfs, &ulp->ul_lockfs, sizeof (struct lockfs));
1191 ulp->ul_fs_lock = (1 << lfs.lf_lock);
1195 * Don't call ufs_thaw() when there's a signal during
1196 * ufs quiesce operation as it can lead to deadlock
1197 * with getpage.
1199 if (signal == 0)
1200 (void) ufs_thaw(vfsp, ufsvfsp, ulp);
1202 ULOCKFS_CLR_BUSY(ulp);
1203 LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
1205 errexit:
1206 atomic_dec_ulong(&ufs_quiesce_pend);
1207 mutex_exit(&ulp->ul_lock);
1208 vfs_unlock(vfsp);
1211 * Allow both the delete thread and the reclaim thread to
1212 * continue.
1214 ufs_thread_continue(&ufsvfsp->vfs_delete);
1215 ufs_thread_continue(&ufsvfsp->vfs_reclaim);
1217 return (error);
1221 * fiolfss
1222 * return the current file system locking state info
1225 ufs_fiolfss(struct vnode *vp, struct lockfs *lockfsp)
1227 struct ulockfs *ulp;
1229 if (!vp || !vp->v_vfsp || !VTOI(vp))
1230 return (EINVAL);
1232 /* file system has been forcibly unmounted */
1233 if (VTOI(vp)->i_ufsvfs == NULL)
1234 return (EIO);
1236 ulp = VTOUL(vp);
1238 if (ULOCKFS_IS_HLOCK(ulp)) {
1239 *lockfsp = ulp->ul_lockfs; /* structure assignment */
1240 return (0);
1243 mutex_enter(&ulp->ul_lock);
1245 *lockfsp = ulp->ul_lockfs; /* structure assignment */
1247 if (ULOCKFS_IS_MOD(ulp))
1248 lockfsp->lf_flags |= LOCKFS_MOD;
1250 mutex_exit(&ulp->ul_lock);
1252 return (0);
1256 * ufs_check_lockfs
1257 * check whether a ufs_vnops conflicts with the file system lock
1260 ufs_check_lockfs(struct ufsvfs *ufsvfsp, struct ulockfs *ulp, ulong_t mask)
1262 k_sigset_t smask;
1263 int sig, slock;
1265 ASSERT(MUTEX_HELD(&ulp->ul_lock));
1267 while (ulp->ul_fs_lock & mask) {
1268 slock = (int)ULOCKFS_IS_SLOCK(ulp);
1269 if ((curthread->t_flag & T_DONTPEND) && !slock) {
1270 curthread->t_flag |= T_WOULDBLOCK;
1271 return (EAGAIN);
1273 curthread->t_flag &= ~T_WOULDBLOCK;
1276 * In the case of an onerr umount of the fs, threads could
1277 * have blocked before coming into ufs_check_lockfs and
1278 * need to check for the special case of ELOCK and
1279 * vfs_dontblock being set which would indicate that the fs
1280 * is on its way out and will not return therefore making
1281 * EIO the appropriate response.
1283 if (ULOCKFS_IS_HLOCK(ulp) ||
1284 (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
1285 return (EIO);
1288 * wait for lock status to change
1290 if (slock || ufsvfsp->vfs_nointr) {
1291 cv_wait(&ulp->ul_cv, &ulp->ul_lock);
1292 } else {
1293 sigintr(&smask, 1);
1294 sig = cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock);
1295 sigunintr(&smask);
1296 if ((!sig && (ulp->ul_fs_lock & mask)) ||
1297 ufsvfsp->vfs_dontblock)
1298 return (EINTR);
1302 if (mask & ULOCKFS_FWLOCK) {
1303 atomic_inc_ulong(&ulp->ul_falloc_cnt);
1304 ULOCKFS_SET_FALLOC(ulp);
1305 } else {
1306 atomic_inc_ulong(&ulp->ul_vnops_cnt);
1309 return (0);
1313 * Check whether we came across the handcrafted lockfs protocol path. We can't
1314 * simply check for T_DONTBLOCK here as one would assume since this can also
1315 * falsely catch recursive VOP's going to a different filesystem, instead we
1316 * check if we already hold the ulockfs->ul_lock mutex.
1318 static int
1319 ufs_lockfs_is_under_rawlockfs(struct ulockfs *ulp)
1321 return ((mutex_owner(&ulp->ul_lock) != curthread) ? 0 : 1);
1325 * ufs_lockfs_begin - start the lockfs locking protocol
1328 ufs_lockfs_begin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
1330 int error;
1331 int rec_vop;
1332 ushort_t op_cnt_incremented = 0;
1333 ulong_t *ctr;
1334 struct ulockfs *ulp;
1335 ulockfs_info_t *ulockfs_info;
1336 ulockfs_info_t *ulockfs_info_free;
1337 ulockfs_info_t *ulockfs_info_temp;
1340 * file system has been forcibly unmounted
1342 if (ufsvfsp == NULL)
1343 return (EIO);
1345 *ulpp = ulp = &ufsvfsp->vfs_ulockfs;
1348 * Do lockfs protocol
1350 ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
1351 IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
1354 * Detect recursive VOP call or handcrafted internal lockfs protocol
1355 * path and bail out in that case.
1357 if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
1358 *ulpp = NULL;
1359 return (0);
1360 } else {
1361 if (ulockfs_info_free == NULL) {
1362 if ((ulockfs_info_temp = (ulockfs_info_t *)
1363 kmem_zalloc(sizeof (ulockfs_info_t),
1364 KM_NOSLEEP)) == NULL) {
1365 *ulpp = NULL;
1366 return (ENOMEM);
1372 * First time VOP call
1374 * Increment the ctr irrespective of the lockfs state. If the lockfs
1375 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
1376 * before incrementing we need to check if there is a pending quiesce
1377 * request because if we have a continuous stream of ufs_lockfs_begin
1378 * requests pounding on a few cpu's then the ufs_quiesce thread might
1379 * never see the value of zero for ctr - a livelock kind of scenario.
1381 ctr = (mask & ULOCKFS_FWLOCK) ?
1382 &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
1383 if (!ULOCKFS_IS_SLOCK(ulp)) {
1384 atomic_inc_ulong(ctr);
1385 op_cnt_incremented++;
1389 * If the lockfs state (indicated by ul_fs_lock) is not just
1390 * ULOCKFS_ULOCK, then we will be routed through ufs_check_lockfs
1391 * where there is a check with an appropriate mask to selectively allow
1392 * operations permitted for that kind of lockfs state.
1394 * Even these selective operations should not be allowed to go through
1395 * if a lockfs request is in progress because that could result in inode
1396 * modifications during a quiesce and could hence result in inode
1397 * reconciliation failures. ULOCKFS_SLOCK alone would not be sufficient,
1398 * so make use of ufs_quiesce_pend to disallow vnode operations when a
1399 * quiesce is in progress.
1401 if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
1402 if (op_cnt_incremented)
1403 if (!atomic_dec_ulong_nv(ctr))
1404 cv_broadcast(&ulp->ul_cv);
1405 mutex_enter(&ulp->ul_lock);
1406 error = ufs_check_lockfs(ufsvfsp, ulp, mask);
1407 mutex_exit(&ulp->ul_lock);
1408 if (error) {
1409 if (ulockfs_info_free == NULL)
1410 kmem_free(ulockfs_info_temp,
1411 sizeof (ulockfs_info_t));
1412 return (error);
1414 } else {
1416 * This is the common case of file system in a unlocked state.
1418 * If a file system is unlocked, we would expect the ctr to have
1419 * been incremented by now. But this will not be true when a
1420 * quiesce is winding up - SLOCK was set when we checked before
1421 * incrementing the ctr, but by the time we checked for
1422 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. It is okay
1423 * to take ul_lock and go through the slow path in this uncommon
1424 * case.
1426 if (op_cnt_incremented == 0) {
1427 mutex_enter(&ulp->ul_lock);
1428 error = ufs_check_lockfs(ufsvfsp, ulp, mask);
1429 if (error) {
1430 mutex_exit(&ulp->ul_lock);
1431 if (ulockfs_info_free == NULL)
1432 kmem_free(ulockfs_info_temp,
1433 sizeof (ulockfs_info_t));
1434 return (error);
1436 if (mask & ULOCKFS_FWLOCK)
1437 ULOCKFS_SET_FALLOC(ulp);
1438 mutex_exit(&ulp->ul_lock);
1439 } else if (mask & ULOCKFS_FWLOCK) {
1440 mutex_enter(&ulp->ul_lock);
1441 ULOCKFS_SET_FALLOC(ulp);
1442 mutex_exit(&ulp->ul_lock);
1446 if (ulockfs_info_free != NULL) {
1447 ulockfs_info_free->ulp = ulp;
1448 if (mask & ULOCKFS_FWLOCK)
1449 ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE;
1450 } else {
1451 ulockfs_info_temp->ulp = ulp;
1452 ulockfs_info_temp->next = ulockfs_info;
1453 if (mask & ULOCKFS_FWLOCK)
1454 ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE;
1455 ASSERT(ufs_lockfs_key != 0);
1456 (void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
1459 curthread->t_flag |= T_DONTBLOCK;
1460 return (0);
1464 * Check whether we are returning from the top level VOP.
1466 static int
1467 ufs_lockfs_top_vop_return(ulockfs_info_t *head)
1469 ulockfs_info_t *info;
1470 int result = 1;
1472 for (info = head; info != NULL; info = info->next) {
1473 if (info->ulp != NULL) {
1474 result = 0;
1475 break;
1479 return (result);
1483 * ufs_lockfs_end - terminate the lockfs locking protocol
1485 void
1486 ufs_lockfs_end(struct ulockfs *ulp)
1488 ulockfs_info_t *info;
1489 ulockfs_info_t *head;
1492 * end-of-VOP protocol
1494 if (ulp == NULL)
1495 return;
1497 head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
1498 SEARCH_ULOCKFSP(head, ulp, info);
1501 * If we're called from a first level VOP, we have to have a
1502 * valid ulockfs record in the TSD.
1504 ASSERT(info != NULL);
1507 * Invalidate the ulockfs record.
1509 info->ulp = NULL;
1511 if (ufs_lockfs_top_vop_return(head))
1512 curthread->t_flag &= ~T_DONTBLOCK;
1514 /* fallocate thread */
1515 if (ULOCKFS_IS_FALLOC(ulp) && info->flags & ULOCK_INFO_FALLOCATE) {
1516 /* Clear the thread's fallocate state */
1517 info->flags &= ~ULOCK_INFO_FALLOCATE;
1518 if (!atomic_dec_ulong_nv(&ulp->ul_falloc_cnt)) {
1519 mutex_enter(&ulp->ul_lock);
1520 ULOCKFS_CLR_FALLOC(ulp);
1521 cv_broadcast(&ulp->ul_cv);
1522 mutex_exit(&ulp->ul_lock);
1524 } else { /* normal thread */
1525 if (!atomic_dec_ulong_nv(&ulp->ul_vnops_cnt))
1526 cv_broadcast(&ulp->ul_cv);
1531 * ufs_lockfs_trybegin - try to start the lockfs locking protocol without
1532 * blocking.
1535 ufs_lockfs_trybegin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
1537 int error = 0;
1538 int rec_vop;
1539 ushort_t op_cnt_incremented = 0;
1540 ulong_t *ctr;
1541 struct ulockfs *ulp;
1542 ulockfs_info_t *ulockfs_info;
1543 ulockfs_info_t *ulockfs_info_free;
1544 ulockfs_info_t *ulockfs_info_temp;
1547 * file system has been forcibly unmounted
1549 if (ufsvfsp == NULL)
1550 return (EIO);
1552 *ulpp = ulp = &ufsvfsp->vfs_ulockfs;
1555 * Do lockfs protocol
1557 ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
1558 IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
1561 * Detect recursive VOP call or handcrafted internal lockfs protocol
1562 * path and bail out in that case.
1564 if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
1565 *ulpp = NULL;
1566 return (0);
1567 } else {
1568 if (ulockfs_info_free == NULL) {
1569 if ((ulockfs_info_temp = (ulockfs_info_t *)
1570 kmem_zalloc(sizeof (ulockfs_info_t),
1571 KM_NOSLEEP)) == NULL) {
1572 *ulpp = NULL;
1573 return (ENOMEM);
1579 * First time VOP call
1581 * Increment the ctr irrespective of the lockfs state. If the lockfs
1582 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
1583 * before incrementing we need to check if there is a pending quiesce
1584 * request because if we have a continuous stream of ufs_lockfs_begin
1585 * requests pounding on a few cpu's then the ufs_quiesce thread might
1586 * never see the value of zero for ctr - a livelock kind of scenario.
1588 ctr = (mask & ULOCKFS_FWLOCK) ?
1589 &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
1590 if (!ULOCKFS_IS_SLOCK(ulp)) {
1591 atomic_inc_ulong(ctr);
1592 op_cnt_incremented++;
1595 if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
1597 * Non-blocking version of ufs_check_lockfs() code.
1599 * If the file system is not hard locked or error locked
1600 * and if ulp->ul_fs_lock allows this operation, increment
1601 * the appropriate counter and proceed (For eg., In case the
1602 * file system is delete locked, a mmap can still go through).
1604 if (op_cnt_incremented)
1605 if (!atomic_dec_ulong_nv(ctr))
1606 cv_broadcast(&ulp->ul_cv);
1607 mutex_enter(&ulp->ul_lock);
1608 if (ULOCKFS_IS_HLOCK(ulp) ||
1609 (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
1610 error = EIO;
1611 else if (ulp->ul_fs_lock & mask)
1612 error = EAGAIN;
1614 if (error) {
1615 mutex_exit(&ulp->ul_lock);
1616 if (ulockfs_info_free == NULL)
1617 kmem_free(ulockfs_info_temp,
1618 sizeof (ulockfs_info_t));
1619 return (error);
1621 atomic_inc_ulong(ctr);
1622 if (mask & ULOCKFS_FWLOCK)
1623 ULOCKFS_SET_FALLOC(ulp);
1624 mutex_exit(&ulp->ul_lock);
1625 } else {
1627 * This is the common case of file system in a unlocked state.
1629 * If a file system is unlocked, we would expect the ctr to have
1630 * been incremented by now. But this will not be true when a
1631 * quiesce is winding up - SLOCK was set when we checked before
1632 * incrementing the ctr, but by the time we checked for
1633 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. Take
1634 * ul_lock and go through the non-blocking version of
1635 * ufs_check_lockfs() code.
1637 if (op_cnt_incremented == 0) {
1638 mutex_enter(&ulp->ul_lock);
1639 if (ULOCKFS_IS_HLOCK(ulp) ||
1640 (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
1641 error = EIO;
1642 else if (ulp->ul_fs_lock & mask)
1643 error = EAGAIN;
1645 if (error) {
1646 mutex_exit(&ulp->ul_lock);
1647 if (ulockfs_info_free == NULL)
1648 kmem_free(ulockfs_info_temp,
1649 sizeof (ulockfs_info_t));
1650 return (error);
1652 atomic_inc_ulong(ctr);
1653 if (mask & ULOCKFS_FWLOCK)
1654 ULOCKFS_SET_FALLOC(ulp);
1655 mutex_exit(&ulp->ul_lock);
1656 } else if (mask & ULOCKFS_FWLOCK) {
1657 mutex_enter(&ulp->ul_lock);
1658 ULOCKFS_SET_FALLOC(ulp);
1659 mutex_exit(&ulp->ul_lock);
1663 if (ulockfs_info_free != NULL) {
1664 ulockfs_info_free->ulp = ulp;
1665 if (mask & ULOCKFS_FWLOCK)
1666 ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE;
1667 } else {
1668 ulockfs_info_temp->ulp = ulp;
1669 ulockfs_info_temp->next = ulockfs_info;
1670 if (mask & ULOCKFS_FWLOCK)
1671 ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE;
1672 ASSERT(ufs_lockfs_key != 0);
1673 (void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
1676 curthread->t_flag |= T_DONTBLOCK;
1677 return (0);
1681 * specialized version of ufs_lockfs_begin() called by ufs_getpage().
1684 ufs_lockfs_begin_getpage(
1685 struct ufsvfs *ufsvfsp,
1686 struct ulockfs **ulpp,
1687 struct seg *seg,
1688 int read_access,
1689 uint_t *protp)
1691 ulong_t mask;
1692 int error;
1693 int rec_vop;
1694 struct ulockfs *ulp;
1695 ulockfs_info_t *ulockfs_info;
1696 ulockfs_info_t *ulockfs_info_free;
1697 ulockfs_info_t *ulockfs_info_temp;
1700 * file system has been forcibly unmounted
1702 if (ufsvfsp == NULL)
1703 return (EIO);
1705 *ulpp = ulp = &ufsvfsp->vfs_ulockfs;
1708 * Do lockfs protocol
1710 ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
1711 IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
1714 * Detect recursive VOP call or handcrafted internal lockfs protocol
1715 * path and bail out in that case.
1717 if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
1718 *ulpp = NULL;
1719 return (0);
1720 } else {
1721 if (ulockfs_info_free == NULL) {
1722 if ((ulockfs_info_temp = (ulockfs_info_t *)
1723 kmem_zalloc(sizeof (ulockfs_info_t),
1724 KM_NOSLEEP)) == NULL) {
1725 *ulpp = NULL;
1726 return (ENOMEM);
1732 * First time VOP call
1734 atomic_inc_ulong(&ulp->ul_vnops_cnt);
1735 if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
1736 if (!atomic_dec_ulong_nv(&ulp->ul_vnops_cnt))
1737 cv_broadcast(&ulp->ul_cv);
1738 mutex_enter(&ulp->ul_lock);
1739 if (seg->s_ops == &segvn_ops &&
1740 ((struct segvn_data *)seg->s_data)->type != MAP_SHARED) {
1741 mask = (ulong_t)ULOCKFS_GETREAD_MASK;
1742 } else if (protp && read_access) {
1744 * Restrict the mapping to readonly.
1745 * Writes to this mapping will cause
1746 * another fault which will then
1747 * be suspended if fs is write locked
1749 *protp &= ~PROT_WRITE;
1750 mask = (ulong_t)ULOCKFS_GETREAD_MASK;
1751 } else
1752 mask = (ulong_t)ULOCKFS_GETWRITE_MASK;
1755 * will sleep if this fs is locked against this VOP
1757 error = ufs_check_lockfs(ufsvfsp, ulp, mask);
1758 mutex_exit(&ulp->ul_lock);
1759 if (error) {
1760 if (ulockfs_info_free == NULL)
1761 kmem_free(ulockfs_info_temp,
1762 sizeof (ulockfs_info_t));
1763 return (error);
1767 if (ulockfs_info_free != NULL) {
1768 ulockfs_info_free->ulp = ulp;
1769 } else {
1770 ulockfs_info_temp->ulp = ulp;
1771 ulockfs_info_temp->next = ulockfs_info;
1772 ASSERT(ufs_lockfs_key != 0);
1773 (void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
1776 curthread->t_flag |= T_DONTBLOCK;
1777 return (0);
1780 void
1781 ufs_lockfs_tsd_destructor(void *head)
1783 ulockfs_info_t *curr = (ulockfs_info_t *)head;
1784 ulockfs_info_t *temp;
1786 for (; curr != NULL; ) {
1788 * The TSD destructor is being called when the thread exits
1789 * (via thread_exit()). At that time it must have cleaned up
1790 * all VOPs via ufs_lockfs_end() and there must not be a
1791 * valid ulockfs record exist while a thread is exiting.
1793 temp = curr;
1794 curr = curr->next;
1795 ASSERT(temp->ulp == NULL);
1796 kmem_free(temp, sizeof (ulockfs_info_t));