4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
25 #include <sys/types.h>
26 #include <sys/t_lock.h>
27 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/sysmacros.h>
31 #include <sys/resource.h>
32 #include <sys/signal.h>
37 #include <sys/vnode.h>
41 #include <sys/fcntl.h>
42 #include <sys/flock.h>
43 #include <sys/atomic.h>
48 #include <sys/pathname.h>
49 #include <sys/debug.h>
50 #include <sys/vmsystm.h>
51 #include <sys/cmn_err.h>
52 #include <sys/filio.h>
55 #include <sys/fs/ufs_filio.h>
56 #include <sys/fs/ufs_lockfs.h>
57 #include <sys/fs/ufs_fs.h>
58 #include <sys/fs/ufs_inode.h>
59 #include <sys/fs/ufs_fsdir.h>
60 #include <sys/fs/ufs_quota.h>
61 #include <sys/fs/ufs_trans.h>
62 #include <sys/fs/ufs_log.h>
63 #include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */
64 #include <sys/errno.h>
65 #include <sys/sysinfo.h>
72 #include <vm/seg_map.h>
73 #include <vm/seg_vn.h>
76 #include <sys/model.h>
77 #include <sys/policy.h>
79 #include "fs/fs_subr.h"
82 * ufs_fioio is the ufs equivalent of NFS_CNVT and is tailored to
83 * metamucil's needs. It may change at any time.
88 struct vnode
*vp
, /* any file on the fs */
89 struct fioio
*fiou
, /* fioio struct in userland */
90 int flag
, /* flag from VOP_IOCTL() */
91 struct cred
*cr
) /* credentials from ufs_ioctl */
94 struct vnode
*vpio
= NULL
; /* vnode for inode open */
95 struct inode
*ipio
= NULL
; /* inode for inode open */
96 struct file
*fpio
= NULL
; /* file for inode open */
97 struct inode
*ip
; /* inode for file system */
98 struct fs
*fs
; /* fs for file system */
99 STRUCT_DECL(fioio
, fio
); /* copy of user's fioio struct */
104 if (secpolicy_fs_config(cr
, vp
->v_vfsp
) != 0)
107 STRUCT_INIT(fio
, flag
& DATAMODEL_MASK
);
110 * get user's copy of fioio struct
112 if (copyin(fiou
, STRUCT_BUF(fio
), STRUCT_SIZE(fio
)))
119 * check the inode number against the fs's inode number bounds
121 if (STRUCT_FGET(fio
, fio_ino
) < UFSROOTINO
)
123 if (STRUCT_FGET(fio
, fio_ino
) >= fs
->fs_ncg
* fs
->fs_ipg
)
126 rw_enter(&ip
->i_ufsvfs
->vfs_dqrwlock
, RW_READER
);
131 error
= ufs_iget(ip
->i_vfs
, STRUCT_FGET(fio
, fio_ino
), &ipio
, cr
);
133 rw_exit(&ip
->i_ufsvfs
->vfs_dqrwlock
);
139 * check the generation number
141 rw_enter(&ipio
->i_contents
, RW_READER
);
142 if (ipio
->i_gen
!= STRUCT_FGET(fio
, fio_gen
)) {
144 rw_exit(&ipio
->i_contents
);
149 * check if the inode is free
151 if (ipio
->i_mode
== 0) {
153 rw_exit(&ipio
->i_contents
);
156 rw_exit(&ipio
->i_contents
);
159 * Adapted from copen: get a file struct
160 * Large Files: We open this file descriptor with FOFFMAX flag
161 * set so that it will be like a large file open.
163 if (falloc(NULL
, (FREAD
|FOFFMAX
), &fpio
, STRUCT_FADDR(fio
, fio_fd
)))
167 * Adapted from vn_open: check access and then open the file
170 if (error
= VOP_ACCESS(vpio
, VREAD
, 0, cr
, NULL
))
173 if (error
= VOP_OPEN(&vpio
, FREAD
, cr
, NULL
))
177 * Adapted from copen: initialize the file struct
179 fpio
->f_vnode
= vpio
;
184 if (copyout(STRUCT_BUF(fio
), fiou
, STRUCT_SIZE(fio
))) {
188 setf(STRUCT_FGET(fio
, fio_fd
), fpio
);
189 mutex_exit(&fpio
->f_tlock
);
193 * free the file struct and fd
196 setf(STRUCT_FGET(fio
, fio_fd
), NULL
);
201 * release the hold on the inode
210 * set access time w/o altering change time. This ioctl is tailored
211 * to metamucil's needs and may change at any time.
215 struct vnode
*vp
, /* file's vnode */
216 struct timeval
*tvu
, /* struct timeval in userland */
217 int flag
, /* flag from VOP_IOCTL() */
218 struct cred
*cr
) /* credentials from ufs_ioctl */
220 struct inode
*ip
; /* inode for vp */
221 struct timeval32 tv
; /* copy of user's timeval */
225 * must have sufficient privileges
227 if (secpolicy_fs_config(cr
, vp
->v_vfsp
) != 0)
231 * get user's copy of timeval struct and check values
232 * if input is NULL, will set time to now
237 if ((flag
& DATAMODEL_MASK
) == DATAMODEL_ILP32
) {
238 if (copyin(tvu
, &tv
, sizeof (tv
)))
243 if (copyin(tvu
, &tv64
, sizeof (tv64
)))
245 if (TIMEVAL_OVERFLOW(&tv64
))
247 TIMEVAL_TO_TIMEVAL32(&tv
, &tv64
);
250 if (tv
.tv_usec
< 0 || tv
.tv_usec
>= 1000000)
258 rw_enter(&ip
->i_contents
, RW_WRITER
);
261 mutex_enter(&ufs_iuniqtime_lock
);
262 ip
->i_atime
= iuniqtime
;
263 mutex_exit(&ufs_iuniqtime_lock
);
267 ip
->i_flag
|= IMODACC
;
268 rw_exit(&ip
->i_contents
);
275 * Get delayed-io state. This ioctl is tailored
276 * to metamucil's needs and may change at any time.
281 struct vnode
*vp
, /* file's vnode */
282 uint_t
*diop
, /* dio state returned here */
283 int flag
, /* flag from ufs_ioctl */
284 struct cred
*cr
) /* credentials from ufs_ioctl */
286 struct ufsvfs
*ufsvfsp
= VTOI(vp
)->i_ufsvfs
;
294 if (suword32(diop
, ufsvfsp
->vfs_dio
))
301 * Set delayed-io state. This ioctl is tailored
302 * to metamucil's needs and may change at any time.
306 struct vnode
*vp
, /* file's vnode */
307 uint_t
*diop
, /* dio flag */
308 int flag
, /* flag from ufs_ioctl */
309 struct cred
*cr
) /* credentials from ufs_ioctl */
311 uint_t dio
; /* copy of user's dio */
312 struct inode
*ip
; /* inode for vp */
313 struct ufsvfs
*ufsvfsp
;
322 /* check input conditions */
323 if (secpolicy_fs_config(cr
, vp
->v_vfsp
) != 0)
326 if (copyin(diop
, &dio
, sizeof (dio
)))
332 /* file system has been forcibly unmounted */
333 if (VTOI(vp
)->i_ufsvfs
== NULL
)
337 ufsvfsp
= ip
->i_ufsvfs
;
338 ulp
= &ufsvfsp
->vfs_ulockfs
;
340 /* logging file system; dio ignored */
341 if (TRANS_ISTRANS(ufsvfsp
))
344 /* hold the mutex to prevent race with a lockfs request */
345 vfs_lock_wait(vp
->v_vfsp
);
346 mutex_enter(&ulp
->ul_lock
);
347 atomic_inc_ulong(&ufs_quiesce_pend
);
349 if (ULOCKFS_IS_HLOCK(ulp
)) {
354 if (ULOCKFS_IS_ELOCK(ulp
)) {
358 /* wait for outstanding accesses to finish */
359 if (error
= ufs_quiesce(ulp
))
362 /* flush w/invalidate */
363 if (error
= ufs_flush(vp
->v_vfsp
))
369 mutex_enter(&ufsvfsp
->vfs_lock
);
370 ufsvfsp
->vfs_dio
= dio
;
373 * enable/disable clean flag processing
376 if (fs
->fs_ronly
== 0 &&
377 fs
->fs_clean
!= FSBAD
&&
378 fs
->fs_clean
!= FSLOG
) {
380 fs
->fs_clean
= FSSUSPEND
;
382 fs
->fs_clean
= FSACTIVE
;
383 ufs_sbwrite(ufsvfsp
);
384 mutex_exit(&ufsvfsp
->vfs_lock
);
386 mutex_exit(&ufsvfsp
->vfs_lock
);
389 * we need this broadcast because of the ufs_quiesce call above
391 atomic_dec_ulong(&ufs_quiesce_pend
);
392 cv_broadcast(&ulp
->ul_cv
);
393 mutex_exit(&ulp
->ul_lock
);
394 vfs_unlock(vp
->v_vfsp
);
399 * ufs_fioffs - ioctl handler for flushing file system
405 char *vap
, /* must be NULL - reserved */
406 struct cred
*cr
) /* credentials from ufs_ioctl */
409 struct ufsvfs
*ufsvfsp
;
412 /* file system has been forcibly unmounted */
413 ufsvfsp
= VTOI(vp
)->i_ufsvfs
;
417 ulp
= &ufsvfsp
->vfs_ulockfs
;
420 * suspend the delete thread
421 * this must be done outside the lockfs locking protocol
423 vfs_lock_wait(vp
->v_vfsp
);
424 ufs_thread_suspend(&ufsvfsp
->vfs_delete
);
426 /* hold the mutex to prevent race with a lockfs request */
427 mutex_enter(&ulp
->ul_lock
);
428 atomic_inc_ulong(&ufs_quiesce_pend
);
430 if (ULOCKFS_IS_HLOCK(ulp
)) {
434 if (ULOCKFS_IS_ELOCK(ulp
)) {
438 /* wait for outstanding accesses to finish */
439 if (error
= ufs_quiesce(ulp
))
443 * If logging, and the logmap was marked as not rollable,
444 * make it rollable now, and start the trans_roll thread and
445 * the reclaim thread. The log at this point is safe to write to.
447 if (ufsvfsp
->vfs_log
) {
448 ml_unit_t
*ul
= ufsvfsp
->vfs_log
;
449 struct fs
*fsp
= ufsvfsp
->vfs_fs
;
452 if (ul
->un_flags
& LDL_NOROLL
) {
453 ul
->un_flags
&= ~LDL_NOROLL
;
454 logmap_start_roll(ul
);
455 if (!fsp
->fs_ronly
&& (fsp
->fs_reclaim
&
456 (FS_RECLAIM
|FS_RECLAIMING
))) {
457 fsp
->fs_reclaim
&= ~FS_RECLAIM
;
458 fsp
->fs_reclaim
|= FS_RECLAIMING
;
459 ufs_thread_start(&ufsvfsp
->vfs_reclaim
,
460 ufs_thread_reclaim
, vp
->v_vfsp
);
461 if (!fsp
->fs_ronly
) {
462 TRANS_SBWRITE(ufsvfsp
,
463 TOP_SBUPDATE_UPDATE
);
465 geterror(ufsvfsp
->vfs_bufp
)) {
467 mntpt
= vfs_getmntpoint(
472 "Reclaim Status for "
473 " %s, Write failed to "
474 "update superblock, "
485 /* synchronously flush dirty data and metadata */
486 error
= ufs_flush(vp
->v_vfsp
);
489 atomic_dec_ulong(&ufs_quiesce_pend
);
490 cv_broadcast(&ulp
->ul_cv
);
491 mutex_exit(&ulp
->ul_lock
);
492 vfs_unlock(vp
->v_vfsp
);
495 * allow the delete thread to continue
497 ufs_thread_continue(&ufsvfsp
->vfs_delete
);
503 * Get number of references on this vnode.
504 * Contract-private interface for Legato's NetWorker product.
508 ufs_fioisbusy(struct vnode
*vp
, int *isbusy
, struct cred
*cr
)
513 * The caller holds one reference, there may be one in the dnlc
514 * so we need to flush it.
519 * Since we've just flushed the dnlc and we hold a reference
520 * to this vnode, then anything but 1 means busy (this had
521 * BETTER not be zero!). Also, it's possible for someone to
522 * have this file mmap'ed with no additional reference count.
524 ASSERT(vp
->v_count
> 0);
525 if ((vp
->v_count
== 1) && (VTOI(vp
)->i_mapcnt
== 0))
530 if (suword32(isbusy
, is_it_busy
))
537 ufs_fiodirectio(struct vnode
*vp
, int cmd
, struct cred
*cr
)
540 struct inode
*ip
= VTOI(vp
);
543 * Acquire reader lock and set/reset direct mode
545 rw_enter(&ip
->i_contents
, RW_READER
);
546 mutex_enter(&ip
->i_tlock
);
547 if (cmd
== DIRECTIO_ON
)
548 ip
->i_flag
|= IDIRECTIO
; /* enable direct mode */
549 else if (cmd
== DIRECTIO_OFF
)
550 ip
->i_flag
&= ~IDIRECTIO
; /* disable direct mode */
553 mutex_exit(&ip
->i_tlock
);
554 rw_exit(&ip
->i_contents
);
560 * Allow some tunables to be set on a mounted fs
563 ufs_fiotune(struct vnode
*vp
, struct fiotune
*uftp
, struct cred
*cr
)
567 struct ufsvfs
*ufsvfsp
;
570 * must have sufficient privileges
572 if (secpolicy_fs_config(cr
, vp
->v_vfsp
) != 0)
578 if (copyin(uftp
, &ftp
, sizeof (ftp
)))
582 * some minimal sanity checks
584 if ((ftp
.maxcontig
<= 0) ||
585 (ftp
.rotdelay
!= 0) ||
588 (ftp
.minfree
> 99) ||
589 ((ftp
.optim
!= FS_OPTTIME
) && (ftp
.optim
!= FS_OPTSPACE
)))
593 * update superblock but don't write it! If it gets out, fine.
597 fs
->fs_maxcontig
= ftp
.maxcontig
;
598 fs
->fs_rotdelay
= ftp
.rotdelay
;
599 fs
->fs_maxbpg
= ftp
.maxbpg
;
600 fs
->fs_minfree
= ftp
.minfree
;
601 fs
->fs_optim
= ftp
.optim
;
604 * Adjust cluster based on the new maxcontig. The cluster size
605 * can be any positive value. The check for this is done above.
607 ufsvfsp
= VTOI(vp
)->i_ufsvfs
;
608 ufsvfsp
->vfs_ioclustsz
= fs
->fs_bsize
* fs
->fs_maxcontig
;
611 * Adjust minfrags from minfree
613 ufsvfsp
->vfs_minfrags
= (int)((int64_t)fs
->fs_dsize
*
614 fs
->fs_minfree
/ 100);
617 * Write the superblock
619 if (fs
->fs_ronly
== 0) {
620 TRANS_BEGIN_ASYNC(ufsvfsp
, TOP_SBUPDATE_UPDATE
,
622 TRANS_SBWRITE(ufsvfsp
, TOP_SBUPDATE_UPDATE
);
623 TRANS_END_ASYNC(ufsvfsp
, TOP_SBUPDATE_UPDATE
, TOP_SBWRITE_SIZE
);
630 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and
631 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter.
634 ufs_fio_holey(vnode_t
*vp
, int cmd
, offset_t
*off
)
636 inode_t
*ip
= VTOI(vp
);
637 u_offset_t noff
= (u_offset_t
)*off
; /* new offset */
642 rw_enter(&ip
->i_contents
, RW_READER
);
645 rw_exit(&ip
->i_contents
);
650 * Check for the usual case where a file has no holes.
651 * If so we can optimise to set the end of the file as the first
652 * (virtual) hole. This avoids bmap_find() searching through
653 * every block in the file for a (non-existent) hole.
655 if (!bmap_has_holes(ip
)) {
656 rw_exit(&ip
->i_contents
);
657 if (cmd
== _FIO_SEEK_HOLE
) {
661 /* *off must already point to valid data (non hole) */
666 * Calling bmap_read() one block at a time on a 1TB file takes forever,
667 * so we use a special function to search for holes or blocks.
669 if (cmd
== _FIO_SEEK_HOLE
)
673 error
= bmap_find(ip
, hole
, &noff
);
674 rw_exit(&ip
->i_contents
);
677 if (error
== ENXIO
) {
679 * Handle the virtual hole at the end of file.
681 if (cmd
== _FIO_SEEK_HOLE
) {
694 ufs_mark_compressed(struct vnode
*vp
)
696 struct inode
*ip
= VTOI(vp
);
697 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
699 if (vp
->v_type
!= VREG
)
702 rw_enter(&ip
->i_contents
, RW_WRITER
);
703 ip
->i_cflags
|= ICOMPRESS
;
704 TRANS_INODE(ufsvfsp
, ip
);
705 ip
->i_flag
|= (ICHG
|ISEQ
);
707 if (!TRANS_ISTRANS(ufsvfsp
))
708 ufs_iupdat(ip
, I_ASYNC
);
709 rw_exit(&ip
->i_contents
);