4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/types.h>
26 #include <sys/t_lock.h>
27 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/sysmacros.h>
31 #include <sys/resource.h>
32 #include <sys/signal.h>
37 #include <sys/vnode.h>
41 #include <sys/fcntl.h>
42 #include <sys/flock.h>
43 #include <sys/atomic.h>
48 #include <sys/pathname.h>
49 #include <sys/debug.h>
50 #include <sys/vmsystm.h>
51 #include <sys/cmn_err.h>
56 #include <sys/fs/ufs_fs.h>
57 #include <sys/fs/ufs_inode.h>
58 #include <sys/fs/ufs_fsdir.h>
59 #include <sys/fs/ufs_trans.h>
60 #include <sys/fs/ufs_panic.h>
61 #include <sys/fs/ufs_mount.h>
62 #include <sys/fs/ufs_bio.h>
63 #include <sys/fs/ufs_log.h>
64 #include <sys/fs/ufs_quota.h>
65 #include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */
66 #include <sys/errno.h>
67 #include <sys/sysinfo.h>
73 #include <vm/seg_map.h>
74 #include <vm/seg_vn.h>
80 extern struct vnode
*common_specvp(struct vnode
*vp
);
82 /* error lock status */
83 #define UN_ERRLCK (-1)
89 * Index to be used in TSD for storing lockfs data
91 uint_t ufs_lockfs_key
;
93 typedef struct _ulockfs_info
{
94 struct _ulockfs_info
*next
;
99 #define ULOCK_INFO_FALLOCATE 0x00000001 /* fallocate thread */
102 * Check in TSD that whether we are already doing any VOP on this filesystem
104 #define IS_REC_VOP(found, head, ulp, free) \
106 ulockfs_info_t *_curr; \
108 for (found = 0, free = NULL, _curr = head; \
109 _curr != NULL; _curr = _curr->next) { \
110 if ((free == NULL) && \
111 (_curr->ulp == NULL)) \
113 if (_curr->ulp == ulp) { \
121 * Get the lockfs data from TSD so that lockfs handles the recursive VOP
124 #define SEARCH_ULOCKFSP(head, ulp, info) \
126 ulockfs_info_t *_curr; \
128 for (_curr = head; _curr != NULL; \
129 _curr = _curr->next) { \
130 if (_curr->ulp == ulp) { \
139 * Validate lockfs request
143 struct lockfs
*lockfsp
, /* new lock request */
144 struct lockfs
*ul_lockfsp
) /* old lock state */
149 * no input flags defined
151 if (lockfsp
->lf_flags
!= 0) {
159 if (!LOCKFS_IS_ULOCK(ul_lockfsp
))
160 if (lockfsp
->lf_key
!= ul_lockfsp
->lf_key
) {
165 lockfsp
->lf_key
= ul_lockfsp
->lf_key
+ 1;
173 * check if accounting is turned on on this fs
177 ufs_checkaccton(struct vnode
*vp
)
179 if (acct_fs_in_use(vp
))
186 * check if local swapping is to file on this fs
189 ufs_checkswapon(struct vnode
*vp
)
191 struct swapinfo
*sip
;
193 mutex_enter(&swapinfo_lock
);
194 for (sip
= swapinfo
; sip
; sip
= sip
->si_next
)
195 if (sip
->si_vp
->v_vfsp
== vp
->v_vfsp
) {
196 mutex_exit(&swapinfo_lock
);
199 mutex_exit(&swapinfo_lock
);
205 * pend future accesses for current lock and desired lock
208 ufs_freeze(struct ulockfs
*ulp
, struct lockfs
*lockfsp
)
211 * set to new lock type
213 ulp
->ul_lockfs
.lf_lock
= lockfsp
->lf_lock
;
214 ulp
->ul_lockfs
.lf_key
= lockfsp
->lf_key
;
215 ulp
->ul_lockfs
.lf_comlen
= lockfsp
->lf_comlen
;
216 ulp
->ul_lockfs
.lf_comment
= lockfsp
->lf_comment
;
218 ulp
->ul_fs_lock
= (1 << ulp
->ul_lockfs
.lf_lock
);
222 * All callers of ufs_quiesce() atomically increment ufs_quiesce_pend before
223 * starting ufs_quiesce() protocol and decrement it only when a file system no
224 * longer has to be in quiescent state. This allows ufs_pageio() to detect
225 * that another thread wants to quiesce a file system. See more comments in
228 ulong_t ufs_quiesce_pend
= 0;
232 * wait for outstanding accesses to finish
235 ufs_quiesce(struct ulockfs
*ulp
)
238 ulockfs_info_t
*head
;
239 ulockfs_info_t
*info
;
240 klwp_t
*lwp
= ttolwp(curthread
);
242 head
= (ulockfs_info_t
*)tsd_get(ufs_lockfs_key
);
243 SEARCH_ULOCKFSP(head
, ulp
, info
);
246 * We have to keep /proc away from stopping us after we applied
247 * the softlock but before we got a chance to clear it again.
248 * prstop() may pagefault and become stuck on the softlock still
255 * Set a softlock to suspend future ufs_vnops so that
256 * this lockfs request will not be starved
258 ULOCKFS_SET_SLOCK(ulp
);
259 ASSERT(ufs_quiesce_pend
);
261 /* check if there is any outstanding ufs vnodeops calls */
262 while (ulp
->ul_vnops_cnt
|| ulp
->ul_falloc_cnt
) {
264 * use timed version of cv_wait_sig() to make sure we don't
265 * miss a wake up call from ufs_pageio() when it doesn't use
268 * when a fallocate thread comes in, the only way it returns
269 * from this function is if there are no other vnode operations
270 * going on (remember fallocate threads are tracked using
271 * ul_falloc_cnt not ul_vnops_cnt), and another fallocate thread
272 * hasn't already grabbed the fs write lock.
274 if (info
&& (info
->flags
& ULOCK_INFO_FALLOCATE
)) {
275 if (!ulp
->ul_vnops_cnt
&& !ULOCKFS_IS_FWLOCK(ulp
))
278 if (!cv_reltimedwait_sig(&ulp
->ul_cv
, &ulp
->ul_lock
, hz
,
287 * unlock the soft lock
289 ULOCKFS_CLR_SLOCK(ulp
);
301 ufs_flush_inode(struct inode
*ip
, void *arg
)
307 * wrong file system; keep looking
309 if (ip
->i_ufsvfs
!= (struct ufsvfs
*)arg
)
313 * asynchronously push all the dirty pages
315 if (((error
= TRANS_SYNCIP(ip
, B_ASYNC
, 0, TOP_SYNCIP_FLUSHI
)) != 0) &&
319 * wait for io and discard all mappings
321 if (error
= TRANS_SYNCIP(ip
, B_INVAL
, 0, TOP_SYNCIP_FLUSHI
))
324 if (ITOV(ip
)->v_type
== VDIR
) {
325 dnlc_dir_purge(&ip
->i_danchor
);
333 * Flush everything that is currently dirty; this includes invalidating
337 ufs_flush(struct vfs
*vfsp
)
341 struct ufsvfs
*ufsvfsp
= (struct ufsvfs
*)vfsp
->vfs_data
;
342 struct fs
*fs
= ufsvfsp
->vfs_fs
;
345 ASSERT(vfs_lock_held(vfsp
));
350 (void) dnlc_purge_vfsp(vfsp
, 0);
353 * drain the delete and idle threads
355 ufs_delete_drain(vfsp
, 0, 0);
356 ufs_idle_drain(vfsp
);
359 * flush and invalidate quota records
361 (void) qsync(ufsvfsp
);
364 * flush w/invalidate the inodes for vfsp
366 if (error
= ufs_scan_inodes(0, ufs_flush_inode
, ufsvfsp
, ufsvfsp
))
370 * synchronously flush superblock and summary info
372 if (fs
->fs_ronly
== 0 && fs
->fs_fmod
) {
374 TRANS_SBUPDATE(ufsvfsp
, vfsp
, TOP_SBUPDATE_FLUSH
);
377 * flush w/invalidate block device pages and buf cache
379 if ((error
= fop_putpage(common_specvp(ufsvfsp
->vfs_devvp
),
380 (offset_t
)0, 0, B_INVAL
, CRED(), NULL
)) > 0)
383 (void) bflush((dev_t
)vfsp
->vfs_dev
);
384 (void) bfinval((dev_t
)vfsp
->vfs_dev
, 0);
387 * drain the delete and idle threads again
389 ufs_delete_drain(vfsp
, 0, 0);
390 ufs_idle_drain(vfsp
);
393 * play with the clean flag
396 ufs_checkclean(vfsp
);
399 * Flush any outstanding transactions and roll the log
400 * only if we are supposed to do, i.e. LDL_NOROLL not set.
401 * We can not simply check for fs_ronly here since fsck also may
402 * use this code to roll the log on a read-only filesystem, e.g.
403 * root during early stages of boot, if other then a sanity check is
404 * done, it will clear LDL_NOROLL before.
405 * In addition we assert that the deltamap does not contain any deltas
406 * in case LDL_NOROLL is set since this is not supposed to happen.
408 if (TRANS_ISTRANS(ufsvfsp
)) {
409 ml_unit_t
*ul
= ufsvfsp
->vfs_log
;
410 mt_map_t
*mtm
= ul
->un_deltamap
;
412 if (ul
->un_flags
& LDL_NOROLL
) {
413 ASSERT(mtm
->mtm_nme
== 0);
416 * Do not set T_DONTBLOCK if there is a
417 * transaction opened by caller.
419 if (curthread
->t_flag
& T_DONTBLOCK
)
422 curthread
->t_flag
|= T_DONTBLOCK
;
424 TRANS_BEGIN_SYNC(ufsvfsp
, TOP_COMMIT_FLUSH
,
425 TOP_COMMIT_SIZE
, &error
);
428 TRANS_END_SYNC(ufsvfsp
, &saverror
,
434 curthread
->t_flag
&= ~T_DONTBLOCK
;
436 logmap_roll_dev(ufsvfsp
->vfs_log
);
445 * special processing when thawing down to wlock
448 ufs_thaw_wlock(struct inode
*ip
, void *arg
)
451 * wrong file system; keep looking
453 if (ip
->i_ufsvfs
!= (struct ufsvfs
*)arg
)
457 * iupdat refuses to clear flags if the fs is read only. The fs
458 * may become read/write during the lock and we wouldn't want
459 * these inodes being written to disk. So clear the flags.
461 rw_enter(&ip
->i_contents
, RW_WRITER
);
462 ip
->i_flag
&= ~(IMOD
|IMODACC
|IACC
|IUPD
|ICHG
|IATTCHG
);
463 rw_exit(&ip
->i_contents
);
466 * pages are mlocked -- fail wlock
468 if (ITOV(ip
)->v_type
!= VCHR
&& vn_has_cached_data(ITOV(ip
)))
476 * special processing when thawing down to hlock or elock
479 ufs_thaw_hlock(struct inode
*ip
, void *arg
)
481 struct vnode
*vp
= ITOV(ip
);
484 * wrong file system; keep looking
486 if (ip
->i_ufsvfs
!= (struct ufsvfs
*)arg
)
490 * blow away all pages - even if they are mlocked
493 (void) TRANS_SYNCIP(ip
, B_INVAL
| B_FORCE
, 0, TOP_SYNCIP_HLOCK
);
494 } while ((vp
->v_type
!= VCHR
) && vn_has_cached_data(vp
));
495 rw_enter(&ip
->i_contents
, RW_WRITER
);
496 ip
->i_flag
&= ~(IMOD
|IMODACC
|IACC
|IUPD
|ICHG
|IATTCHG
);
497 rw_exit(&ip
->i_contents
);
504 * thaw file system lock down to current value
507 ufs_thaw(struct vfs
*vfsp
, struct ufsvfs
*ufsvfsp
, struct ulockfs
*ulp
)
510 int noidel
= (int)(ulp
->ul_flag
& ULOCKFS_NOIDEL
);
513 * if wlock or hlock or elock
515 if (ULOCKFS_IS_WLOCK(ulp
) || ULOCKFS_IS_HLOCK(ulp
) ||
516 ULOCKFS_IS_ELOCK(ulp
)) {
519 * don't keep access times
520 * don't free deleted files
521 * if superblock writes are allowed, limit them to me for now
523 ulp
->ul_flag
|= (ULOCKFS_NOIACC
|ULOCKFS_NOIDEL
);
524 if (ulp
->ul_sbowner
!= (kthread_id_t
)-1)
525 ulp
->ul_sbowner
= curthread
;
528 * wait for writes for deleted files and superblock updates
530 (void) ufs_flush(vfsp
);
533 * now make sure the quota file is up-to-date
534 * expensive; but effective
536 error
= ufs_flush(vfsp
);
538 * no one can write the superblock
540 ulp
->ul_sbowner
= (kthread_id_t
)-1;
543 * special processing for wlock/hlock/elock
545 if (ULOCKFS_IS_WLOCK(ulp
)) {
548 error
= bfinval(ufsvfsp
->vfs_dev
, 0);
551 error
= ufs_scan_inodes(0, ufs_thaw_wlock
,
552 (void *)ufsvfsp
, ufsvfsp
);
556 if (ULOCKFS_IS_HLOCK(ulp
) || ULOCKFS_IS_ELOCK(ulp
)) {
558 (void) ufs_scan_inodes(0, ufs_thaw_hlock
,
559 (void *)ufsvfsp
, ufsvfsp
);
560 (void) bfinval(ufsvfsp
->vfs_dev
, 1);
565 * okay to keep access times
566 * okay to free deleted files
567 * okay to write the superblock
569 ulp
->ul_flag
&= ~(ULOCKFS_NOIACC
|ULOCKFS_NOIDEL
);
570 ulp
->ul_sbowner
= NULL
;
573 * flush in case deleted files are in memory
576 if (error
= ufs_flush(vfsp
))
582 cv_broadcast(&ulp
->ul_cv
);
588 * reconcile incore superblock with ondisk superblock
591 ufs_reconcile_fs(struct vfs
*vfsp
, struct ufsvfs
*ufsvfsp
, int errlck
)
593 struct fs
*mfs
; /* in-memory superblock */
594 struct fs
*dfs
; /* on-disk superblock */
595 struct buf
*bp
; /* on-disk superblock buf */
597 char finished_fsclean
;
599 mfs
= ufsvfsp
->vfs_fs
;
602 * get the on-disk copy of the superblock
604 bp
= UFS_BREAD(ufsvfsp
, vfsp
->vfs_dev
, SBLOCK
, SBSIZE
);
605 bp
->b_flags
|= (B_STALE
|B_AGE
);
606 if (bp
->b_flags
& B_ERROR
) {
612 /* error locks may only unlock after the fs has been made consistent */
613 if (errlck
== UN_ERRLCK
) {
614 if (dfs
->fs_clean
== FSFIX
) { /* being repaired */
618 /* repair not yet started? */
619 finished_fsclean
= TRANS_ISTRANS(ufsvfsp
)? FSLOG
: FSCLEAN
;
620 if (dfs
->fs_clean
!= finished_fsclean
) {
627 * if superblock has changed too much, abort
629 if ((mfs
->fs_sblkno
!= dfs
->fs_sblkno
) ||
630 (mfs
->fs_cblkno
!= dfs
->fs_cblkno
) ||
631 (mfs
->fs_iblkno
!= dfs
->fs_iblkno
) ||
632 (mfs
->fs_dblkno
!= dfs
->fs_dblkno
) ||
633 (mfs
->fs_cgoffset
!= dfs
->fs_cgoffset
) ||
634 (mfs
->fs_cgmask
!= dfs
->fs_cgmask
) ||
635 (mfs
->fs_bsize
!= dfs
->fs_bsize
) ||
636 (mfs
->fs_fsize
!= dfs
->fs_fsize
) ||
637 (mfs
->fs_frag
!= dfs
->fs_frag
) ||
638 (mfs
->fs_bmask
!= dfs
->fs_bmask
) ||
639 (mfs
->fs_fmask
!= dfs
->fs_fmask
) ||
640 (mfs
->fs_bshift
!= dfs
->fs_bshift
) ||
641 (mfs
->fs_fshift
!= dfs
->fs_fshift
) ||
642 (mfs
->fs_fragshift
!= dfs
->fs_fragshift
) ||
643 (mfs
->fs_fsbtodb
!= dfs
->fs_fsbtodb
) ||
644 (mfs
->fs_sbsize
!= dfs
->fs_sbsize
) ||
645 (mfs
->fs_nindir
!= dfs
->fs_nindir
) ||
646 (mfs
->fs_nspf
!= dfs
->fs_nspf
) ||
647 (mfs
->fs_trackskew
!= dfs
->fs_trackskew
) ||
648 (mfs
->fs_cgsize
!= dfs
->fs_cgsize
) ||
649 (mfs
->fs_ntrak
!= dfs
->fs_ntrak
) ||
650 (mfs
->fs_nsect
!= dfs
->fs_nsect
) ||
651 (mfs
->fs_spc
!= dfs
->fs_spc
) ||
652 (mfs
->fs_cpg
!= dfs
->fs_cpg
) ||
653 (mfs
->fs_ipg
!= dfs
->fs_ipg
) ||
654 (mfs
->fs_fpg
!= dfs
->fs_fpg
) ||
655 (mfs
->fs_postblformat
!= dfs
->fs_postblformat
) ||
656 (mfs
->fs_magic
!= dfs
->fs_magic
)) {
660 if (dfs
->fs_clean
== FSBAD
|| FSOKAY
!= dfs
->fs_state
+ dfs
->fs_time
)
661 if (mfs
->fs_clean
== FSLOG
) {
667 * get new summary info
669 if (ufs_getsummaryinfo(vfsp
->vfs_dev
, ufsvfsp
, dfs
)) {
675 * release old summary info and update in-memory superblock
677 kmem_free(mfs
->fs_u
.fs_csp
, mfs
->fs_cssize
);
678 mfs
->fs_u
.fs_csp
= dfs
->fs_u
.fs_csp
; /* Only entry 0 used */
681 * update fields allowed to change
683 mfs
->fs_size
= dfs
->fs_size
;
684 mfs
->fs_dsize
= dfs
->fs_dsize
;
685 mfs
->fs_ncg
= dfs
->fs_ncg
;
686 mfs
->fs_minfree
= dfs
->fs_minfree
;
687 mfs
->fs_rotdelay
= dfs
->fs_rotdelay
;
688 mfs
->fs_rps
= dfs
->fs_rps
;
689 mfs
->fs_maxcontig
= dfs
->fs_maxcontig
;
690 mfs
->fs_maxbpg
= dfs
->fs_maxbpg
;
691 mfs
->fs_csmask
= dfs
->fs_csmask
;
692 mfs
->fs_csshift
= dfs
->fs_csshift
;
693 mfs
->fs_optim
= dfs
->fs_optim
;
694 mfs
->fs_csaddr
= dfs
->fs_csaddr
;
695 mfs
->fs_cssize
= dfs
->fs_cssize
;
696 mfs
->fs_ncyl
= dfs
->fs_ncyl
;
697 mfs
->fs_cstotal
= dfs
->fs_cstotal
;
698 mfs
->fs_reclaim
= dfs
->fs_reclaim
;
700 if (mfs
->fs_reclaim
& (FS_RECLAIM
|FS_RECLAIMING
)) {
701 mfs
->fs_reclaim
&= ~FS_RECLAIM
;
702 mfs
->fs_reclaim
|= FS_RECLAIMING
;
703 ufs_thread_start(&ufsvfsp
->vfs_reclaim
,
704 ufs_thread_reclaim
, vfsp
);
707 /* XXX What to do about sparecon? */
709 /* XXX need to copy volume label */
712 * ondisk clean flag overrides inmemory clean flag iff == FSBAD
713 * or if error-locked and ondisk is now clean
715 needs_unlock
= !MUTEX_HELD(&ufsvfsp
->vfs_lock
);
717 mutex_enter(&ufsvfsp
->vfs_lock
);
719 if (errlck
== UN_ERRLCK
) {
720 if (finished_fsclean
== dfs
->fs_clean
)
721 mfs
->fs_clean
= finished_fsclean
;
723 mfs
->fs_clean
= FSBAD
;
724 mfs
->fs_state
= FSOKAY
- dfs
->fs_time
;
727 if (FSOKAY
!= dfs
->fs_state
+ dfs
->fs_time
||
728 (dfs
->fs_clean
== FSBAD
))
729 mfs
->fs_clean
= FSBAD
;
732 mutex_exit(&ufsvfsp
->vfs_lock
);
740 * ufs_reconcile_inode
741 * reconcile ondisk inode with incore inode
744 ufs_reconcile_inode(struct inode
*ip
, void *arg
)
749 struct dinode
*dp
; /* ondisk inode */
750 struct buf
*bp
= NULL
;
757 * not an inode we care about
759 if (ip
->i_ufsvfs
!= (struct ufsvfs
*)arg
)
765 * Inode reconciliation fails: we made the filesystem quiescent
766 * and we did a ufs_flush() before calling ufs_reconcile_inode()
767 * and thus the inode should not have been changed inbetween.
768 * Any discrepancies indicate a logic error and a pretty
769 * significant run-state inconsistency we should complain about.
771 if (ip
->i_flag
& (IMOD
|IMODACC
|IACC
|IUPD
|ICHG
|IATTCHG
)) {
772 cmn_err(CE_WARN
, "%s: Inode reconciliation failed for"
773 "inode %llu", fs
->fs_fsmnt
, (u_longlong_t
)ip
->i_number
);
780 bp
= UFS_BREAD(ip
->i_ufsvfs
,
781 ip
->i_dev
, (daddr_t
)fsbtodb(fs
, itod(fs
, ip
->i_number
)),
783 if (bp
->b_flags
& B_ERROR
) {
787 dp
= bp
->b_un
.b_dino
;
788 dp
+= itoo(fs
, ip
->i_number
);
791 * handle Sun's implementation of EFT
793 d_uid
= (dp
->di_suid
== UID_LONG
) ? dp
->di_uid
: (uid_t
)dp
->di_suid
;
794 d_gid
= (dp
->di_sgid
== GID_LONG
) ? dp
->di_gid
: (uid_t
)dp
->di_sgid
;
796 rw_enter(&ip
->i_contents
, RW_WRITER
);
799 * some fields are not allowed to change
801 if ((ip
->i_mode
!= dp
->di_mode
) ||
802 (ip
->i_gen
!= dp
->di_gen
) ||
803 (ip
->i_uid
!= d_uid
) ||
804 (ip
->i_gid
!= d_gid
)) {
810 * and some are allowed to change
812 ip
->i_size
= dp
->di_size
;
813 ip
->i_ic
.ic_flags
= dp
->di_ic
.ic_flags
;
814 ip
->i_blocks
= dp
->di_blocks
;
815 ip
->i_nlink
= dp
->di_nlink
;
816 if (ip
->i_flag
& IFASTSYMLNK
) {
823 for (i
= 0; i
< ndaddr
; ++i
)
824 ip
->i_db
[i
] = dp
->di_db
[i
];
825 for (i
= 0; i
< niaddr
; ++i
)
826 ip
->i_ib
[i
] = dp
->di_ib
[i
];
829 rw_exit(&ip
->i_contents
);
836 * reconcile ondisk superblock/inodes with any incore
839 ufs_reconcile(struct vfs
*vfsp
, struct ufsvfs
*ufsvfsp
, int errlck
)
844 * get rid of as much inmemory data as possible
846 (void) ufs_flush(vfsp
);
849 * reconcile the superblock and inodes
851 if (error
= ufs_reconcile_fs(vfsp
, ufsvfsp
, errlck
))
853 if (error
= ufs_scan_inodes(0, ufs_reconcile_inode
, ufsvfsp
, ufsvfsp
))
856 * allocation blocks may be incorrect; get rid of them
858 (void) ufs_flush(vfsp
);
864 * File system locking
867 ufs_fiolfs(struct vnode
*vp
, struct lockfs
*lockfsp
, int from_log
)
869 return (ufs__fiolfs(vp
, lockfsp
, /* from_user */ 1, from_log
));
872 /* kernel-internal interface, also used by fix-on-panic */
876 struct lockfs
*lockfsp
,
884 struct ufsvfs
*ufsvfsp
;
885 int errlck
= NO_ERRLCK
;
886 int poll_events
= POLLPRI
;
887 extern struct pollhead ufs_pollhd
;
888 ulockfs_info_t
*head
;
889 ulockfs_info_t
*info
;
892 /* check valid lock type */
893 if (!lockfsp
|| lockfsp
->lf_lock
> LOCKFS_MAXLOCK
)
896 if (!vp
|| !vp
->v_vfsp
|| !vp
->v_vfsp
->vfs_data
)
901 if (vfsp
->vfs_flag
& VFS_UNMOUNTED
) /* has been unmounted */
904 /* take the lock and check again */
906 if (vfsp
->vfs_flag
& VFS_UNMOUNTED
) {
912 * Can't wlock or ro/elock fs with accounting or local swap file
913 * We need to check for this before we grab the ul_lock to avoid
914 * deadlocks with the accounting framework.
916 if ((LOCKFS_IS_WLOCK(lockfsp
) || LOCKFS_IS_ELOCK(lockfsp
) ||
917 LOCKFS_IS_ROELOCK(lockfsp
)) && !from_log
) {
918 if (ufs_checkaccton(vp
) || ufs_checkswapon(vp
)) {
924 ufsvfsp
= (struct ufsvfs
*)vfsp
->vfs_data
;
925 ulp
= &ufsvfsp
->vfs_ulockfs
;
926 head
= (ulockfs_info_t
*)tsd_get(ufs_lockfs_key
);
927 SEARCH_ULOCKFSP(head
, ulp
, info
);
930 * Suspend both the reclaim thread and the delete thread.
931 * This must be done outside the lockfs locking protocol.
933 ufs_thread_suspend(&ufsvfsp
->vfs_reclaim
);
934 ufs_thread_suspend(&ufsvfsp
->vfs_delete
);
936 mutex_enter(&ulp
->ul_lock
);
937 atomic_inc_ulong(&ufs_quiesce_pend
);
940 * Quit if there is another lockfs request in progress
941 * that is waiting for existing ufs_vnops to complete.
943 if (ULOCKFS_IS_BUSY(ulp
)) {
948 /* cannot ulocked or downgrade a hard-lock */
949 if (ULOCKFS_IS_HLOCK(ulp
)) {
954 /* an error lock may be unlocked or relocked, only */
955 if (ULOCKFS_IS_ELOCK(ulp
)) {
956 if (!LOCKFS_IS_ULOCK(lockfsp
) && !LOCKFS_IS_ELOCK(lockfsp
)) {
963 * a read-only error lock may only be upgraded to an
964 * error lock or hard lock
966 if (ULOCKFS_IS_ROELOCK(ulp
)) {
967 if (!LOCKFS_IS_HLOCK(lockfsp
) && !LOCKFS_IS_ELOCK(lockfsp
)) {
974 * until read-only error locks are fully implemented
977 if (LOCKFS_IS_ROELOCK(lockfsp
)) {
983 * an error lock may only be applied if the file system is
984 * unlocked or already error locked.
985 * (this is to prevent the case where a fs gets changed out from
986 * underneath a fs that is locked for backup,
987 * that is, name/delete/write-locked.)
989 if ((!ULOCKFS_IS_ULOCK(ulp
) && !ULOCKFS_IS_ELOCK(ulp
) &&
990 !ULOCKFS_IS_ROELOCK(ulp
)) &&
991 (LOCKFS_IS_ELOCK(lockfsp
) || LOCKFS_IS_ROELOCK(lockfsp
))) {
996 /* get and validate the input lockfs request */
997 if (error
= ufs_getlfd(lockfsp
, &ulp
->ul_lockfs
))
1001 * save current ulockfs struct
1003 bcopy(&ulp
->ul_lockfs
, &lfs
, sizeof (struct lockfs
));
1006 * Freeze the file system (pend future accesses)
1008 ufs_freeze(ulp
, lockfsp
);
1011 * Set locking in progress because ufs_quiesce may free the
1014 ULOCKFS_SET_BUSY(ulp
);
1015 /* update the ioctl copy */
1016 LOCKFS_SET_BUSY(&ulp
->ul_lockfs
);
1019 * We need to unset FWLOCK status before we call ufs_quiesce
1020 * so that the thread doesnt get suspended. We do this only if
1021 * this (fallocate) thread requested an unlock operation.
1023 if (info
&& (info
->flags
& ULOCK_INFO_FALLOCATE
)) {
1024 if (!ULOCKFS_IS_WLOCK(ulp
))
1025 ULOCKFS_CLR_FWLOCK(ulp
);
1029 * Quiesce (wait for outstanding accesses to finish)
1031 if (error
= ufs_quiesce(ulp
)) {
1033 * Interrupted due to signal. There could still be
1039 * We do broadcast because lock-status
1040 * could be reverted to old status.
1042 cv_broadcast(&ulp
->ul_cv
);
1047 * If the fallocate thread requested a write fs lock operation
1048 * then we set fwlock status in the ulp.
1050 if (info
&& (info
->flags
& ULOCK_INFO_FALLOCATE
)) {
1051 if (ULOCKFS_IS_WLOCK(ulp
))
1052 ULOCKFS_SET_FWLOCK(ulp
);
1056 * save error lock status to pass down to reconcilation
1057 * routines and for later cleanup
1059 if (LOCKFS_IS_ELOCK(&lfs
) && ULOCKFS_IS_ULOCK(ulp
))
1062 if (ULOCKFS_IS_ELOCK(ulp
) || ULOCKFS_IS_ROELOCK(ulp
)) {
1066 poll_events
|= POLLERR
;
1067 errlck
= LOCKFS_IS_ELOCK(&lfs
) || LOCKFS_IS_ROELOCK(&lfs
) ?
1068 RE_ERRLCK
: SET_ERRLCK
;
1070 needs_unlock
= !MUTEX_HELD(&ufsvfsp
->vfs_lock
);
1072 mutex_enter(&ufsvfsp
->vfs_lock
);
1074 /* disable delayed i/o */
1077 if (errlck
== SET_ERRLCK
) {
1078 ufsvfsp
->vfs_fs
->fs_clean
= FSBAD
;
1082 needs_sbwrite
|= ufsvfsp
->vfs_dio
;
1083 ufsvfsp
->vfs_dio
= 0;
1086 mutex_exit(&ufsvfsp
->vfs_lock
);
1088 if (needs_sbwrite
) {
1089 ulp
->ul_sbowner
= curthread
;
1090 TRANS_SBWRITE(ufsvfsp
, TOP_SBWRITE_STABLE
);
1093 mutex_enter(&ufsvfsp
->vfs_lock
);
1095 ufsvfsp
->vfs_fs
->fs_fmod
= 0;
1098 mutex_exit(&ufsvfsp
->vfs_lock
);
1103 * reconcile superblock and inodes if was wlocked
1105 if (LOCKFS_IS_WLOCK(&lfs
) || LOCKFS_IS_ELOCK(&lfs
)) {
1106 if (error
= ufs_reconcile(vfsp
, ufsvfsp
, errlck
))
1109 * in case the fs grew; reset the metadata map for logging tests
1111 TRANS_MATA_UMOUNT(ufsvfsp
);
1112 TRANS_MATA_MOUNT(ufsvfsp
);
1113 TRANS_MATA_SI(ufsvfsp
, ufsvfsp
->vfs_fs
);
1117 * At least everything *currently* dirty goes out.
1120 if ((error
= ufs_flush(vfsp
)) != 0 && !ULOCKFS_IS_HLOCK(ulp
) &&
1121 !ULOCKFS_IS_ELOCK(ulp
))
1125 * thaw file system and wakeup pended processes
1127 if (error
= ufs_thaw(vfsp
, ufsvfsp
, ulp
))
1128 if (!ULOCKFS_IS_HLOCK(ulp
) && !ULOCKFS_IS_ELOCK(ulp
))
1132 * reset modified flag if not already write locked
1134 if (!LOCKFS_IS_WLOCK(&lfs
))
1135 ULOCKFS_CLR_MOD(ulp
);
1138 * idle the lock struct
1140 ULOCKFS_CLR_BUSY(ulp
);
1141 /* update the ioctl copy */
1142 LOCKFS_CLR_BUSY(&ulp
->ul_lockfs
);
1145 * free current comment
1147 if (lfs
.lf_comment
&& lfs
.lf_comlen
!= 0) {
1148 kmem_free(lfs
.lf_comment
, lfs
.lf_comlen
);
1149 lfs
.lf_comment
= NULL
;
1153 /* do error lock cleanup */
1154 if (errlck
== UN_ERRLCK
)
1155 ufsfx_unlockfs(ufsvfsp
);
1157 else if (errlck
== RE_ERRLCK
)
1158 ufsfx_lockfs(ufsvfsp
);
1160 /* don't allow error lock from user to invoke panic */
1161 else if (from_user
&& errlck
== SET_ERRLCK
&&
1162 !(ufsvfsp
->vfs_fsfx
.fx_flags
& (UFSMNT_ONERROR_PANIC
>> 4)))
1163 (void) ufs_fault(ufsvfsp
->vfs_root
,
1164 ulp
->ul_lockfs
.lf_comment
&& ulp
->ul_lockfs
.lf_comlen
> 0 ?
1165 ulp
->ul_lockfs
.lf_comment
: "user-applied error lock");
1167 atomic_dec_ulong(&ufs_quiesce_pend
);
1168 mutex_exit(&ulp
->ul_lock
);
1171 if (ULOCKFS_IS_HLOCK(&ufsvfsp
->vfs_ulockfs
))
1172 poll_events
|= POLLERR
;
1174 pollwakeup(&ufs_pollhd
, poll_events
);
1177 * Allow both the delete thread and the reclaim thread to
1180 ufs_thread_continue(&ufsvfsp
->vfs_delete
);
1181 ufs_thread_continue(&ufsvfsp
->vfs_reclaim
);
1187 * Lock failed. Reset the old lock in ufsvfs if not hard locked.
1189 if (!LOCKFS_IS_HLOCK(&ulp
->ul_lockfs
)) {
1190 bcopy(&lfs
, &ulp
->ul_lockfs
, sizeof (struct lockfs
));
1191 ulp
->ul_fs_lock
= (1 << lfs
.lf_lock
);
1195 * Don't call ufs_thaw() when there's a signal during
1196 * ufs quiesce operation as it can lead to deadlock
1200 (void) ufs_thaw(vfsp
, ufsvfsp
, ulp
);
1202 ULOCKFS_CLR_BUSY(ulp
);
1203 LOCKFS_CLR_BUSY(&ulp
->ul_lockfs
);
1206 atomic_dec_ulong(&ufs_quiesce_pend
);
1207 mutex_exit(&ulp
->ul_lock
);
1211 * Allow both the delete thread and the reclaim thread to
1214 ufs_thread_continue(&ufsvfsp
->vfs_delete
);
1215 ufs_thread_continue(&ufsvfsp
->vfs_reclaim
);
1222 * return the current file system locking state info
1225 ufs_fiolfss(struct vnode
*vp
, struct lockfs
*lockfsp
)
1227 struct ulockfs
*ulp
;
1229 if (!vp
|| !vp
->v_vfsp
|| !VTOI(vp
))
1232 /* file system has been forcibly unmounted */
1233 if (VTOI(vp
)->i_ufsvfs
== NULL
)
1238 if (ULOCKFS_IS_HLOCK(ulp
)) {
1239 *lockfsp
= ulp
->ul_lockfs
; /* structure assignment */
1243 mutex_enter(&ulp
->ul_lock
);
1245 *lockfsp
= ulp
->ul_lockfs
; /* structure assignment */
1247 if (ULOCKFS_IS_MOD(ulp
))
1248 lockfsp
->lf_flags
|= LOCKFS_MOD
;
1250 mutex_exit(&ulp
->ul_lock
);
1257 * check whether a ufs_vnops conflicts with the file system lock
1260 ufs_check_lockfs(struct ufsvfs
*ufsvfsp
, struct ulockfs
*ulp
, ulong_t mask
)
1265 ASSERT(MUTEX_HELD(&ulp
->ul_lock
));
1267 while (ulp
->ul_fs_lock
& mask
) {
1268 slock
= (int)ULOCKFS_IS_SLOCK(ulp
);
1269 if ((curthread
->t_flag
& T_DONTPEND
) && !slock
) {
1270 curthread
->t_flag
|= T_WOULDBLOCK
;
1273 curthread
->t_flag
&= ~T_WOULDBLOCK
;
1276 * In the case of an onerr umount of the fs, threads could
1277 * have blocked before coming into ufs_check_lockfs and
1278 * need to check for the special case of ELOCK and
1279 * vfs_dontblock being set which would indicate that the fs
1280 * is on its way out and will not return therefore making
1281 * EIO the appropriate response.
1283 if (ULOCKFS_IS_HLOCK(ulp
) ||
1284 (ULOCKFS_IS_ELOCK(ulp
) && ufsvfsp
->vfs_dontblock
))
1288 * wait for lock status to change
1290 if (slock
|| ufsvfsp
->vfs_nointr
) {
1291 cv_wait(&ulp
->ul_cv
, &ulp
->ul_lock
);
1294 sig
= cv_wait_sig(&ulp
->ul_cv
, &ulp
->ul_lock
);
1296 if ((!sig
&& (ulp
->ul_fs_lock
& mask
)) ||
1297 ufsvfsp
->vfs_dontblock
)
1302 if (mask
& ULOCKFS_FWLOCK
) {
1303 atomic_inc_ulong(&ulp
->ul_falloc_cnt
);
1304 ULOCKFS_SET_FALLOC(ulp
);
1306 atomic_inc_ulong(&ulp
->ul_vnops_cnt
);
1313 * Check whether we came across the handcrafted lockfs protocol path. We can't
1314 * simply check for T_DONTBLOCK here as one would assume since this can also
1315 * falsely catch recursive VOP's going to a different filesystem, instead we
1316 * check if we already hold the ulockfs->ul_lock mutex.
1319 ufs_lockfs_is_under_rawlockfs(struct ulockfs
*ulp
)
1321 return ((mutex_owner(&ulp
->ul_lock
) != curthread
) ? 0 : 1);
1325 * ufs_lockfs_begin - start the lockfs locking protocol
1328 ufs_lockfs_begin(struct ufsvfs
*ufsvfsp
, struct ulockfs
**ulpp
, ulong_t mask
)
1332 ushort_t op_cnt_incremented
= 0;
1334 struct ulockfs
*ulp
;
1335 ulockfs_info_t
*ulockfs_info
;
1336 ulockfs_info_t
*ulockfs_info_free
;
1337 ulockfs_info_t
*ulockfs_info_temp
;
1340 * file system has been forcibly unmounted
1342 if (ufsvfsp
== NULL
)
1345 *ulpp
= ulp
= &ufsvfsp
->vfs_ulockfs
;
1348 * Do lockfs protocol
1350 ulockfs_info
= (ulockfs_info_t
*)tsd_get(ufs_lockfs_key
);
1351 IS_REC_VOP(rec_vop
, ulockfs_info
, ulp
, ulockfs_info_free
);
1354 * Detect recursive VOP call or handcrafted internal lockfs protocol
1355 * path and bail out in that case.
1357 if (rec_vop
|| ufs_lockfs_is_under_rawlockfs(ulp
)) {
1361 if (ulockfs_info_free
== NULL
) {
1362 if ((ulockfs_info_temp
= (ulockfs_info_t
*)
1363 kmem_zalloc(sizeof (ulockfs_info_t
),
1364 KM_NOSLEEP
)) == NULL
) {
1372 * First time VOP call
1374 * Increment the ctr irrespective of the lockfs state. If the lockfs
1375 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
1376 * before incrementing we need to check if there is a pending quiesce
1377 * request because if we have a continuous stream of ufs_lockfs_begin
1378 * requests pounding on a few cpu's then the ufs_quiesce thread might
1379 * never see the value of zero for ctr - a livelock kind of scenario.
1381 ctr
= (mask
& ULOCKFS_FWLOCK
) ?
1382 &ulp
->ul_falloc_cnt
: &ulp
->ul_vnops_cnt
;
1383 if (!ULOCKFS_IS_SLOCK(ulp
)) {
1384 atomic_inc_ulong(ctr
);
1385 op_cnt_incremented
++;
1389 * If the lockfs state (indicated by ul_fs_lock) is not just
1390 * ULOCKFS_ULOCK, then we will be routed through ufs_check_lockfs
1391 * where there is a check with an appropriate mask to selectively allow
1392 * operations permitted for that kind of lockfs state.
1394 * Even these selective operations should not be allowed to go through
1395 * if a lockfs request is in progress because that could result in inode
1396 * modifications during a quiesce and could hence result in inode
1397 * reconciliation failures. ULOCKFS_SLOCK alone would not be sufficient,
1398 * so make use of ufs_quiesce_pend to disallow vnode operations when a
1399 * quiesce is in progress.
1401 if (!ULOCKFS_IS_JUSTULOCK(ulp
) || ufs_quiesce_pend
) {
1402 if (op_cnt_incremented
)
1403 if (!atomic_dec_ulong_nv(ctr
))
1404 cv_broadcast(&ulp
->ul_cv
);
1405 mutex_enter(&ulp
->ul_lock
);
1406 error
= ufs_check_lockfs(ufsvfsp
, ulp
, mask
);
1407 mutex_exit(&ulp
->ul_lock
);
1409 if (ulockfs_info_free
== NULL
)
1410 kmem_free(ulockfs_info_temp
,
1411 sizeof (ulockfs_info_t
));
1416 * This is the common case of file system in a unlocked state.
1418 * If a file system is unlocked, we would expect the ctr to have
1419 * been incremented by now. But this will not be true when a
1420 * quiesce is winding up - SLOCK was set when we checked before
1421 * incrementing the ctr, but by the time we checked for
1422 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. It is okay
1423 * to take ul_lock and go through the slow path in this uncommon
1426 if (op_cnt_incremented
== 0) {
1427 mutex_enter(&ulp
->ul_lock
);
1428 error
= ufs_check_lockfs(ufsvfsp
, ulp
, mask
);
1430 mutex_exit(&ulp
->ul_lock
);
1431 if (ulockfs_info_free
== NULL
)
1432 kmem_free(ulockfs_info_temp
,
1433 sizeof (ulockfs_info_t
));
1436 if (mask
& ULOCKFS_FWLOCK
)
1437 ULOCKFS_SET_FALLOC(ulp
);
1438 mutex_exit(&ulp
->ul_lock
);
1439 } else if (mask
& ULOCKFS_FWLOCK
) {
1440 mutex_enter(&ulp
->ul_lock
);
1441 ULOCKFS_SET_FALLOC(ulp
);
1442 mutex_exit(&ulp
->ul_lock
);
1446 if (ulockfs_info_free
!= NULL
) {
1447 ulockfs_info_free
->ulp
= ulp
;
1448 if (mask
& ULOCKFS_FWLOCK
)
1449 ulockfs_info_free
->flags
|= ULOCK_INFO_FALLOCATE
;
1451 ulockfs_info_temp
->ulp
= ulp
;
1452 ulockfs_info_temp
->next
= ulockfs_info
;
1453 if (mask
& ULOCKFS_FWLOCK
)
1454 ulockfs_info_temp
->flags
|= ULOCK_INFO_FALLOCATE
;
1455 ASSERT(ufs_lockfs_key
!= 0);
1456 (void) tsd_set(ufs_lockfs_key
, (void *)ulockfs_info_temp
);
1459 curthread
->t_flag
|= T_DONTBLOCK
;
1464 * Check whether we are returning from the top level VOP.
1467 ufs_lockfs_top_vop_return(ulockfs_info_t
*head
)
1469 ulockfs_info_t
*info
;
1472 for (info
= head
; info
!= NULL
; info
= info
->next
) {
1473 if (info
->ulp
!= NULL
) {
1483 * ufs_lockfs_end - terminate the lockfs locking protocol
1486 ufs_lockfs_end(struct ulockfs
*ulp
)
1488 ulockfs_info_t
*info
;
1489 ulockfs_info_t
*head
;
1492 * end-of-VOP protocol
1497 head
= (ulockfs_info_t
*)tsd_get(ufs_lockfs_key
);
1498 SEARCH_ULOCKFSP(head
, ulp
, info
);
1501 * If we're called from a first level VOP, we have to have a
1502 * valid ulockfs record in the TSD.
1504 ASSERT(info
!= NULL
);
1507 * Invalidate the ulockfs record.
1511 if (ufs_lockfs_top_vop_return(head
))
1512 curthread
->t_flag
&= ~T_DONTBLOCK
;
1514 /* fallocate thread */
1515 if (ULOCKFS_IS_FALLOC(ulp
) && info
->flags
& ULOCK_INFO_FALLOCATE
) {
1516 /* Clear the thread's fallocate state */
1517 info
->flags
&= ~ULOCK_INFO_FALLOCATE
;
1518 if (!atomic_dec_ulong_nv(&ulp
->ul_falloc_cnt
)) {
1519 mutex_enter(&ulp
->ul_lock
);
1520 ULOCKFS_CLR_FALLOC(ulp
);
1521 cv_broadcast(&ulp
->ul_cv
);
1522 mutex_exit(&ulp
->ul_lock
);
1524 } else { /* normal thread */
1525 if (!atomic_dec_ulong_nv(&ulp
->ul_vnops_cnt
))
1526 cv_broadcast(&ulp
->ul_cv
);
1531 * ufs_lockfs_trybegin - try to start the lockfs locking protocol without
1535 ufs_lockfs_trybegin(struct ufsvfs
*ufsvfsp
, struct ulockfs
**ulpp
, ulong_t mask
)
1539 ushort_t op_cnt_incremented
= 0;
1541 struct ulockfs
*ulp
;
1542 ulockfs_info_t
*ulockfs_info
;
1543 ulockfs_info_t
*ulockfs_info_free
;
1544 ulockfs_info_t
*ulockfs_info_temp
;
1547 * file system has been forcibly unmounted
1549 if (ufsvfsp
== NULL
)
1552 *ulpp
= ulp
= &ufsvfsp
->vfs_ulockfs
;
1555 * Do lockfs protocol
1557 ulockfs_info
= (ulockfs_info_t
*)tsd_get(ufs_lockfs_key
);
1558 IS_REC_VOP(rec_vop
, ulockfs_info
, ulp
, ulockfs_info_free
);
1561 * Detect recursive VOP call or handcrafted internal lockfs protocol
1562 * path and bail out in that case.
1564 if (rec_vop
|| ufs_lockfs_is_under_rawlockfs(ulp
)) {
1568 if (ulockfs_info_free
== NULL
) {
1569 if ((ulockfs_info_temp
= (ulockfs_info_t
*)
1570 kmem_zalloc(sizeof (ulockfs_info_t
),
1571 KM_NOSLEEP
)) == NULL
) {
1579 * First time VOP call
1581 * Increment the ctr irrespective of the lockfs state. If the lockfs
1582 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
1583 * before incrementing we need to check if there is a pending quiesce
1584 * request because if we have a continuous stream of ufs_lockfs_begin
1585 * requests pounding on a few cpu's then the ufs_quiesce thread might
1586 * never see the value of zero for ctr - a livelock kind of scenario.
1588 ctr
= (mask
& ULOCKFS_FWLOCK
) ?
1589 &ulp
->ul_falloc_cnt
: &ulp
->ul_vnops_cnt
;
1590 if (!ULOCKFS_IS_SLOCK(ulp
)) {
1591 atomic_inc_ulong(ctr
);
1592 op_cnt_incremented
++;
1595 if (!ULOCKFS_IS_JUSTULOCK(ulp
) || ufs_quiesce_pend
) {
1597 * Non-blocking version of ufs_check_lockfs() code.
1599 * If the file system is not hard locked or error locked
1600 * and if ulp->ul_fs_lock allows this operation, increment
1601 * the appropriate counter and proceed (For eg., In case the
1602 * file system is delete locked, a mmap can still go through).
1604 if (op_cnt_incremented
)
1605 if (!atomic_dec_ulong_nv(ctr
))
1606 cv_broadcast(&ulp
->ul_cv
);
1607 mutex_enter(&ulp
->ul_lock
);
1608 if (ULOCKFS_IS_HLOCK(ulp
) ||
1609 (ULOCKFS_IS_ELOCK(ulp
) && ufsvfsp
->vfs_dontblock
))
1611 else if (ulp
->ul_fs_lock
& mask
)
1615 mutex_exit(&ulp
->ul_lock
);
1616 if (ulockfs_info_free
== NULL
)
1617 kmem_free(ulockfs_info_temp
,
1618 sizeof (ulockfs_info_t
));
1621 atomic_inc_ulong(ctr
);
1622 if (mask
& ULOCKFS_FWLOCK
)
1623 ULOCKFS_SET_FALLOC(ulp
);
1624 mutex_exit(&ulp
->ul_lock
);
1627 * This is the common case of file system in a unlocked state.
1629 * If a file system is unlocked, we would expect the ctr to have
1630 * been incremented by now. But this will not be true when a
1631 * quiesce is winding up - SLOCK was set when we checked before
1632 * incrementing the ctr, but by the time we checked for
1633 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. Take
1634 * ul_lock and go through the non-blocking version of
1635 * ufs_check_lockfs() code.
1637 if (op_cnt_incremented
== 0) {
1638 mutex_enter(&ulp
->ul_lock
);
1639 if (ULOCKFS_IS_HLOCK(ulp
) ||
1640 (ULOCKFS_IS_ELOCK(ulp
) && ufsvfsp
->vfs_dontblock
))
1642 else if (ulp
->ul_fs_lock
& mask
)
1646 mutex_exit(&ulp
->ul_lock
);
1647 if (ulockfs_info_free
== NULL
)
1648 kmem_free(ulockfs_info_temp
,
1649 sizeof (ulockfs_info_t
));
1652 atomic_inc_ulong(ctr
);
1653 if (mask
& ULOCKFS_FWLOCK
)
1654 ULOCKFS_SET_FALLOC(ulp
);
1655 mutex_exit(&ulp
->ul_lock
);
1656 } else if (mask
& ULOCKFS_FWLOCK
) {
1657 mutex_enter(&ulp
->ul_lock
);
1658 ULOCKFS_SET_FALLOC(ulp
);
1659 mutex_exit(&ulp
->ul_lock
);
1663 if (ulockfs_info_free
!= NULL
) {
1664 ulockfs_info_free
->ulp
= ulp
;
1665 if (mask
& ULOCKFS_FWLOCK
)
1666 ulockfs_info_free
->flags
|= ULOCK_INFO_FALLOCATE
;
1668 ulockfs_info_temp
->ulp
= ulp
;
1669 ulockfs_info_temp
->next
= ulockfs_info
;
1670 if (mask
& ULOCKFS_FWLOCK
)
1671 ulockfs_info_temp
->flags
|= ULOCK_INFO_FALLOCATE
;
1672 ASSERT(ufs_lockfs_key
!= 0);
1673 (void) tsd_set(ufs_lockfs_key
, (void *)ulockfs_info_temp
);
1676 curthread
->t_flag
|= T_DONTBLOCK
;
1681 * specialized version of ufs_lockfs_begin() called by ufs_getpage().
1684 ufs_lockfs_begin_getpage(
1685 struct ufsvfs
*ufsvfsp
,
1686 struct ulockfs
**ulpp
,
1694 struct ulockfs
*ulp
;
1695 ulockfs_info_t
*ulockfs_info
;
1696 ulockfs_info_t
*ulockfs_info_free
;
1697 ulockfs_info_t
*ulockfs_info_temp
;
1700 * file system has been forcibly unmounted
1702 if (ufsvfsp
== NULL
)
1705 *ulpp
= ulp
= &ufsvfsp
->vfs_ulockfs
;
1708 * Do lockfs protocol
1710 ulockfs_info
= (ulockfs_info_t
*)tsd_get(ufs_lockfs_key
);
1711 IS_REC_VOP(rec_vop
, ulockfs_info
, ulp
, ulockfs_info_free
);
1714 * Detect recursive VOP call or handcrafted internal lockfs protocol
1715 * path and bail out in that case.
1717 if (rec_vop
|| ufs_lockfs_is_under_rawlockfs(ulp
)) {
1721 if (ulockfs_info_free
== NULL
) {
1722 if ((ulockfs_info_temp
= (ulockfs_info_t
*)
1723 kmem_zalloc(sizeof (ulockfs_info_t
),
1724 KM_NOSLEEP
)) == NULL
) {
1732 * First time VOP call
1734 atomic_inc_ulong(&ulp
->ul_vnops_cnt
);
1735 if (!ULOCKFS_IS_JUSTULOCK(ulp
) || ufs_quiesce_pend
) {
1736 if (!atomic_dec_ulong_nv(&ulp
->ul_vnops_cnt
))
1737 cv_broadcast(&ulp
->ul_cv
);
1738 mutex_enter(&ulp
->ul_lock
);
1739 if (seg
->s_ops
== &segvn_ops
&&
1740 ((struct segvn_data
*)seg
->s_data
)->type
!= MAP_SHARED
) {
1741 mask
= (ulong_t
)ULOCKFS_GETREAD_MASK
;
1742 } else if (protp
&& read_access
) {
1744 * Restrict the mapping to readonly.
1745 * Writes to this mapping will cause
1746 * another fault which will then
1747 * be suspended if fs is write locked
1749 *protp
&= ~PROT_WRITE
;
1750 mask
= (ulong_t
)ULOCKFS_GETREAD_MASK
;
1752 mask
= (ulong_t
)ULOCKFS_GETWRITE_MASK
;
1755 * will sleep if this fs is locked against this VOP
1757 error
= ufs_check_lockfs(ufsvfsp
, ulp
, mask
);
1758 mutex_exit(&ulp
->ul_lock
);
1760 if (ulockfs_info_free
== NULL
)
1761 kmem_free(ulockfs_info_temp
,
1762 sizeof (ulockfs_info_t
));
1767 if (ulockfs_info_free
!= NULL
) {
1768 ulockfs_info_free
->ulp
= ulp
;
1770 ulockfs_info_temp
->ulp
= ulp
;
1771 ulockfs_info_temp
->next
= ulockfs_info
;
1772 ASSERT(ufs_lockfs_key
!= 0);
1773 (void) tsd_set(ufs_lockfs_key
, (void *)ulockfs_info_temp
);
1776 curthread
->t_flag
|= T_DONTBLOCK
;
1781 ufs_lockfs_tsd_destructor(void *head
)
1783 ulockfs_info_t
*curr
= (ulockfs_info_t
*)head
;
1784 ulockfs_info_t
*temp
;
1786 for (; curr
!= NULL
; ) {
1788 * The TSD destructor is being called when the thread exits
1789 * (via thread_exit()). At that time it must have cleaned up
1790 * all VOPs via ufs_lockfs_end() and there must not be a
1791 * valid ulockfs record exist while a thread is exiting.
1795 ASSERT(temp
->ulp
== NULL
);
1796 kmem_free(temp
, sizeof (ulockfs_info_t
));