4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * Portions of this source code were derived from Berkeley 4.3 BSD
31 * under license from the Regents of the University of California.
34 #include <sys/sysmacros.h>
35 #include <sys/param.h>
36 #include <sys/types.h>
37 #include <sys/systm.h>
38 #include <sys/t_lock.h>
41 #include <sys/thread.h>
43 #include <sys/errno.h>
45 #include <sys/vnode.h>
46 #include <sys/fs/ufs_trans.h>
47 #include <sys/fs/ufs_inode.h>
48 #include <sys/fs/ufs_fs.h>
49 #include <sys/fs/ufs_fsdir.h>
50 #include <sys/fs/ufs_quota.h>
51 #include <sys/fs/ufs_panic.h>
52 #include <sys/fs/ufs_bio.h>
53 #include <sys/fs/ufs_log.h>
54 #include <sys/cmn_err.h>
56 #include <sys/debug.h>
59 extern kmutex_t ufsvfs_mutex
;
60 extern struct ufsvfs
*ufs_instances
;
63 * hlock any file systems w/errored logs
68 struct ufsvfs
*ufsvfsp
;
74 * find fs's that paniced or have errored logging devices
76 mutex_enter(&ufsvfs_mutex
);
77 for (ufsvfsp
= ufs_instances
; ufsvfsp
; ufsvfsp
= ufsvfsp
->vfs_next
) {
79 * not mounted; continue
81 if ((ufsvfsp
->vfs_vfs
== NULL
) ||
82 (ufsvfsp
->vfs_validfs
== UT_UNMOUNTED
))
85 * disallow unmounts (hlock occurs below)
87 if (TRANS_ISERROR(ufsvfsp
))
88 ufsvfsp
->vfs_validfs
= UT_HLOCKING
;
90 mutex_exit(&ufsvfs_mutex
);
93 * hlock the fs's that paniced or have errored logging devices
96 mutex_enter(&ufsvfs_mutex
);
97 for (ufsvfsp
= ufs_instances
; ufsvfsp
; ufsvfsp
= ufsvfsp
->vfs_next
)
98 if (ufsvfsp
->vfs_validfs
== UT_HLOCKING
)
100 mutex_exit(&ufsvfs_mutex
);
104 * hlock the file system
106 (void) ufs_fiolfss(ufsvfsp
->vfs_root
, &lockfs
);
107 if (!LOCKFS_IS_ELOCK(&lockfs
)) {
108 lockfs
.lf_lock
= LOCKFS_HLOCK
;
110 lockfs
.lf_comlen
= 0;
111 lockfs
.lf_comment
= NULL
;
112 error
= ufs_fiolfs(ufsvfsp
->vfs_root
, &lockfs
, 0);
114 * retry after awhile; another app currently doing lockfs
116 if (error
== EBUSY
|| error
== EINVAL
)
119 if (ufsfx_get_failure_qlen() > 0) {
120 if (mutex_tryenter(&ufs_fix
.uq_mutex
)) {
121 ufs_fix
.uq_lowat
= ufs_fix
.uq_ne
;
122 cv_broadcast(&ufs_fix
.uq_cv
);
123 mutex_exit(&ufs_fix
.uq_mutex
);
132 ufsvfsp
->vfs_validfs
= UT_MOUNTED
;
140 mutex_enter(&ufs_hlock
.uq_mutex
);
141 ufs_hlock
.uq_ne
= ufs_hlock
.uq_lowat
;
142 cv_broadcast(&ufs_hlock
.uq_cv
);
143 mutex_exit(&ufs_hlock
.uq_mutex
);
147 ufs_trans_sbupdate(struct ufsvfs
*ufsvfsp
, struct vfs
*vfsp
, top_t topid
)
149 if (curthread
->t_flag
& T_DONTBLOCK
) {
154 if (panicstr
&& TRANS_ISTRANS(ufsvfsp
))
157 curthread
->t_flag
|= T_DONTBLOCK
;
158 TRANS_BEGIN_ASYNC(ufsvfsp
, topid
, TOP_SBUPDATE_SIZE
);
160 TRANS_END_ASYNC(ufsvfsp
, topid
, TOP_SBUPDATE_SIZE
);
161 curthread
->t_flag
&= ~T_DONTBLOCK
;
166 ufs_trans_iupdat(struct inode
*ip
, int waitfor
)
168 struct ufsvfs
*ufsvfsp
;
170 if (curthread
->t_flag
& T_DONTBLOCK
) {
171 rw_enter(&ip
->i_contents
, RW_READER
);
172 ufs_iupdat(ip
, waitfor
);
173 rw_exit(&ip
->i_contents
);
176 ufsvfsp
= ip
->i_ufsvfs
;
178 if (panicstr
&& TRANS_ISTRANS(ufsvfsp
))
181 curthread
->t_flag
|= T_DONTBLOCK
;
182 TRANS_BEGIN_ASYNC(ufsvfsp
, TOP_IUPDAT
, TOP_IUPDAT_SIZE(ip
));
183 rw_enter(&ip
->i_contents
, RW_READER
);
184 ufs_iupdat(ip
, waitfor
);
185 rw_exit(&ip
->i_contents
);
186 TRANS_END_ASYNC(ufsvfsp
, TOP_IUPDAT
, TOP_IUPDAT_SIZE(ip
));
187 curthread
->t_flag
&= ~T_DONTBLOCK
;
192 ufs_trans_sbwrite(struct ufsvfs
*ufsvfsp
, top_t topid
)
194 if (curthread
->t_flag
& T_DONTBLOCK
) {
195 mutex_enter(&ufsvfsp
->vfs_lock
);
196 ufs_sbwrite(ufsvfsp
);
197 mutex_exit(&ufsvfsp
->vfs_lock
);
201 if (panicstr
&& TRANS_ISTRANS(ufsvfsp
))
204 curthread
->t_flag
|= T_DONTBLOCK
;
205 TRANS_BEGIN_ASYNC(ufsvfsp
, topid
, TOP_SBWRITE_SIZE
);
206 mutex_enter(&ufsvfsp
->vfs_lock
);
207 ufs_sbwrite(ufsvfsp
);
208 mutex_exit(&ufsvfsp
->vfs_lock
);
209 TRANS_END_ASYNC(ufsvfsp
, topid
, TOP_SBWRITE_SIZE
);
210 curthread
->t_flag
&= ~T_DONTBLOCK
;
216 ufs_trans_push_si(ufsvfs_t
*ufsvfsp
, delta_t dtyp
, int ignore
)
220 fs
= ufsvfsp
->vfs_fs
;
221 mutex_enter(&ufsvfsp
->vfs_lock
);
222 TRANS_LOG(ufsvfsp
, (char *)fs
->fs_u
.fs_csp
,
223 ldbtob(fsbtodb(fs
, fs
->fs_csaddr
)), fs
->fs_cssize
,
224 (caddr_t
)fs
->fs_u
.fs_csp
, fs
->fs_cssize
);
225 mutex_exit(&ufsvfsp
->vfs_lock
);
231 ufs_trans_push_buf(ufsvfs_t
*ufsvfsp
, delta_t dtyp
, daddr_t bno
)
235 bp
= (struct buf
*)UFS_GETBLK(ufsvfsp
, ufsvfsp
->vfs_dev
, bno
, 1);
239 if (bp
->b_flags
& B_DELWRI
) {
241 * Do not use brwrite() here since the buffer is already
242 * marked for retry or not by the code that called
245 UFS_BWRITE(ufsvfsp
, bp
);
249 * If we did not find the real buf for this block above then
250 * clear the dev so the buf won't be found by mistake
251 * for this block later. We had to allocate at least a 1 byte
252 * buffer to keep brelse happy.
254 if (bp
->b_bufsize
== 1) {
255 bp
->b_dev
= (o_dev_t
)NODEV
;
265 ufs_trans_push_inode(ufsvfs_t
*ufsvfsp
, delta_t dtyp
, ino_t ino
)
271 * Grab the quota lock (if the file system has not been forcibly
275 rw_enter(&ufsvfsp
->vfs_dqrwlock
, RW_READER
);
277 error
= ufs_iget(ufsvfsp
->vfs_vfs
, ino
, &ip
, kcred
);
280 rw_exit(&ufsvfsp
->vfs_dqrwlock
);
284 if (ip
->i_flag
& (IUPD
|IACC
|ICHG
|IMOD
|IMODACC
|IATTCHG
)) {
285 rw_enter(&ip
->i_contents
, RW_READER
);
287 rw_exit(&ip
->i_contents
);
297 * These routines maintain the metadata map (matamap)
301 * update the metadata map at mount
304 ufs_trans_mata_mount_scan(struct inode
*ip
, void *arg
)
307 * wrong file system; keep looking
309 if (ip
->i_ufsvfs
!= (struct ufsvfs
*)arg
)
313 * load the metadata map
315 rw_enter(&ip
->i_contents
, RW_WRITER
);
316 ufs_trans_mata_iget(ip
);
317 rw_exit(&ip
->i_contents
);
322 ufs_trans_mata_mount(struct ufsvfs
*ufsvfsp
)
324 struct fs
*fs
= ufsvfsp
->vfs_fs
;
329 * put static metadata into matamap
335 TRANS_MATAADD(ufsvfsp
, ldbtob(SBLOCK
), fs
->fs_sbsize
);
337 for (ino
= i
= 0; i
< fs
->fs_ncg
; ++i
, ino
+= fs
->fs_ipg
) {
338 TRANS_MATAADD(ufsvfsp
,
339 ldbtob(fsbtodb(fs
, cgtod(fs
, i
))), fs
->fs_cgsize
);
340 TRANS_MATAADD(ufsvfsp
,
341 ldbtob(fsbtodb(fs
, itod(fs
, ino
))),
342 fs
->fs_ipg
* sizeof (struct dinode
));
344 (void) ufs_scan_inodes(0, ufs_trans_mata_mount_scan
, ufsvfsp
, ufsvfsp
);
348 * clear the metadata map at umount
351 ufs_trans_mata_umount(struct ufsvfs
*ufsvfsp
)
353 top_mataclr(ufsvfsp
);
357 * summary info (may be extended during growfs test)
360 ufs_trans_mata_si(struct ufsvfs
*ufsvfsp
, struct fs
*fs
)
362 TRANS_MATAADD(ufsvfsp
, ldbtob(fsbtodb(fs
, fs
->fs_csaddr
)),
367 * scan an allocation block (either inode or true block)
370 ufs_trans_mata_direct(
379 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
380 struct fs
*fs
= ufsvfsp
->vfs_fs
;
382 for (i
= 0; i
< nblk
&& *fragsp
; ++i
, ++blkp
)
383 if ((frag
= *blkp
) != 0) {
384 if (*fragsp
> fs
->fs_frag
) {
386 *fragsp
-= fs
->fs_frag
;
388 nb
= *fragsp
* fs
->fs_fsize
;
391 TRANS_MATAADD(ufsvfsp
, ldbtob(fsbtodb(fs
, frag
)), nb
);
396 * scan an indirect allocation block (either inode or true block)
399 ufs_trans_mata_indir(
405 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
406 struct fs
*fs
= ufsvfsp
->vfs_fs
;
407 int ne
= fs
->fs_bsize
/ (int)sizeof (daddr32_t
);
411 o_mode_t ifmt
= ip
->i_mode
& IFMT
;
413 bp
= UFS_BREAD(ufsvfsp
, ip
->i_dev
, fsbtodb(fs
, frag
), fs
->fs_bsize
);
414 if (bp
->b_flags
& B_ERROR
) {
418 blkp
= bp
->b_un
.b_daddr
;
420 if (level
|| (ifmt
== IFDIR
) || (ifmt
== IFSHAD
) ||
421 (ifmt
== IFATTRDIR
) || (ip
== ip
->i_ufsvfs
->vfs_qinod
))
422 ufs_trans_mata_direct(ip
, fragsp
, blkp
, ne
);
425 for (i
= 0; i
< ne
&& *fragsp
; ++i
, ++blkp
)
426 ufs_trans_mata_indir(ip
, fragsp
, *blkp
, level
-1);
431 * put appropriate metadata into matamap for this inode
434 ufs_trans_mata_iget(struct inode
*ip
)
437 daddr_t frags
= dbtofsb(ip
->i_fs
, ip
->i_blocks
);
438 o_mode_t ifmt
= ip
->i_mode
& IFMT
;
440 if (frags
&& ((ifmt
== IFDIR
) || (ifmt
== IFSHAD
) ||
441 (ifmt
== IFATTRDIR
) || (ip
== ip
->i_ufsvfs
->vfs_qinod
)))
442 ufs_trans_mata_direct(ip
, &frags
, &ip
->i_db
[0], NDADDR
);
445 ufs_trans_mata_direct(ip
, &frags
, &ip
->i_ib
[0], NIADDR
);
447 for (i
= 0; i
< NIADDR
&& frags
; ++i
)
449 ufs_trans_mata_indir(ip
, &frags
, ip
->i_ib
[i
], i
);
453 * freeing possible metadata (block of user data)
456 ufs_trans_mata_free(struct ufsvfs
*ufsvfsp
, offset_t mof
, off_t nb
)
458 top_matadel(ufsvfsp
, mof
, nb
);
463 * allocating metadata
466 ufs_trans_mata_alloc(
467 struct ufsvfs
*ufsvfsp
,
473 struct fs
*fs
= ufsvfsp
->vfs_fs
;
474 o_mode_t ifmt
= ip
->i_mode
& IFMT
;
476 if (indir
|| ((ifmt
== IFDIR
) || (ifmt
== IFSHAD
) ||
477 (ifmt
== IFATTRDIR
) || (ip
== ip
->i_ufsvfs
->vfs_qinod
)))
478 TRANS_MATAADD(ufsvfsp
, ldbtob(fsbtodb(fs
, frag
)), nb
);
484 * ufs_trans_dir is used to declare a directory delta
487 ufs_trans_dir(struct inode
*ip
, off_t offset
)
490 int contig
= 0, error
;
493 ASSERT(RW_WRITE_HELD(&ip
->i_contents
));
494 error
= bmap_read(ip
, (u_offset_t
)offset
, &bn
, &contig
);
495 if (error
|| (bn
== UFS_HOLE
)) {
496 cmn_err(CE_WARN
, "ufs_trans_dir - could not get block"
497 " number error = %d bn = %d\n", error
, (int)bn
);
498 if (error
== 0) /* treat UFS_HOLE as an I/O error */
502 TRANS_DELTA(ip
->i_ufsvfs
, ldbtob(bn
), DIRBLKSIZ
, DT_DIR
, 0, 0);
508 ufs_trans_push_quota(ufsvfs_t
*ufsvfsp
, delta_t dtyp
, struct dquot
*dqp
)
511 * Lock the quota subsystem (ufsvfsp can be NULL
512 * if the DQ_ERROR is set).
515 rw_enter(&ufsvfsp
->vfs_dqrwlock
, RW_READER
);
516 mutex_enter(&dqp
->dq_lock
);
519 * If this transaction has been cancelled by closedq_scan_inode(),
520 * then bail out now. We don't call dqput() in this case because
521 * it has already been done.
523 if ((dqp
->dq_flags
& DQ_TRANS
) == 0) {
524 mutex_exit(&dqp
->dq_lock
);
526 rw_exit(&ufsvfsp
->vfs_dqrwlock
);
530 if (dqp
->dq_flags
& DQ_ERROR
) {
532 * Paranoia to make sure that there is at least one
533 * reference to the dquot struct. We are done with
534 * the dquot (due to an error) so clear logging
537 ASSERT(dqp
->dq_cnt
>= 1);
538 dqp
->dq_flags
&= ~DQ_TRANS
;
540 mutex_exit(&dqp
->dq_lock
);
542 rw_exit(&ufsvfsp
->vfs_dqrwlock
);
546 if (dqp
->dq_flags
& (DQ_MOD
| DQ_BLKS
| DQ_FILES
)) {
547 ASSERT((dqp
->dq_mof
!= UFS_HOLE
) && (dqp
->dq_mof
!= 0));
548 TRANS_LOG(ufsvfsp
, (caddr_t
)&dqp
->dq_dqb
,
549 dqp
->dq_mof
, (int)sizeof (struct dqblk
), NULL
, 0);
551 * Paranoia to make sure that there is at least one
552 * reference to the dquot struct. Clear the
553 * modification flag because the operation is now in
554 * the log. Also clear the logging specific markers
555 * that were set in ufs_trans_quota().
557 ASSERT(dqp
->dq_cnt
>= 1);
558 dqp
->dq_flags
&= ~(DQ_MOD
| DQ_TRANS
);
563 * At this point, the logging specific flag should be clear,
564 * but add paranoia just in case something has gone wrong.
566 ASSERT((dqp
->dq_flags
& DQ_TRANS
) == 0);
567 mutex_exit(&dqp
->dq_lock
);
569 rw_exit(&ufsvfsp
->vfs_dqrwlock
);
574 * ufs_trans_quota take in a uid, allocates the disk space, placing the
575 * quota record into the metamap, then declares the delta.
579 ufs_trans_quota(struct dquot
*dqp
)
582 struct inode
*qip
= dqp
->dq_ufsvfsp
->vfs_qinod
;
585 ASSERT(MUTEX_HELD(&dqp
->dq_lock
));
586 ASSERT(dqp
->dq_flags
& DQ_MOD
);
587 ASSERT(dqp
->dq_mof
!= 0);
588 ASSERT(dqp
->dq_mof
!= UFS_HOLE
);
591 * Mark this dquot to indicate that we are starting a logging
592 * file system operation for this dquot. Also increment the
593 * reference count so that the dquot does not get reused while
594 * it is on the mapentry_t list. DQ_TRANS is cleared and the
595 * reference count is decremented by ufs_trans_push_quota.
597 * If the file system is force-unmounted while there is a
598 * pending quota transaction, then closedq_scan_inode() will
599 * clear the DQ_TRANS flag and decrement the reference count.
601 * Since deltamap_add() drops multiple transactions to the
602 * same dq_mof and ufs_trans_push_quota() won't get called,
603 * we use DQ_TRANS to prevent repeat transactions from
604 * incrementing the reference count (or calling TRANS_DELTA()).
606 if ((dqp
->dq_flags
& DQ_TRANS
) == 0) {
607 dqp
->dq_flags
|= DQ_TRANS
;
609 TRANS_DELTA(qip
->i_ufsvfs
, dqp
->dq_mof
, sizeof (struct dqblk
),
610 DT_QR
, ufs_trans_push_quota
, (ulong_t
)dqp
);
615 ufs_trans_dqrele(struct dquot
*dqp
)
617 struct ufsvfs
*ufsvfsp
= dqp
->dq_ufsvfsp
;
619 curthread
->t_flag
|= T_DONTBLOCK
;
620 TRANS_BEGIN_ASYNC(ufsvfsp
, TOP_QUOTA
, TOP_QUOTA_SIZE
);
621 rw_enter(&ufsvfsp
->vfs_dqrwlock
, RW_READER
);
623 rw_exit(&ufsvfsp
->vfs_dqrwlock
);
624 TRANS_END_ASYNC(ufsvfsp
, TOP_QUOTA
, TOP_QUOTA_SIZE
);
625 curthread
->t_flag
&= ~T_DONTBLOCK
;
628 int ufs_trans_max_resv
= TOP_MAX_RESV
; /* will be adjusted for testing */
629 long ufs_trans_avgbfree
= 0; /* will be adjusted for testing */
630 #define TRANS_MAX_WRITE (1024 * 1024)
631 size_t ufs_trans_max_resid
= TRANS_MAX_WRITE
;
634 * Calculate the log reservation for the given write or truncate
637 ufs_log_amt(struct inode
*ip
, offset_t offset
, ssize_t resid
, int trunc
)
641 u_offset_t writeend
, offblk
;
643 daddr_t nblk
, maxfblk
;
645 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
646 struct fs
*fs
= ufsvfsp
->vfs_fs
;
647 long fni
= NINDIR(fs
);
648 int bsize
= fs
->fs_bsize
;
651 * Assume that the request will fit in 1 or 2 cg's,
652 * resv is the amount of log space to reserve (in bytes).
654 resv
= SIZECG(ip
) * 2 + INODESIZE
+ 1024;
657 * get max position of write in fs blocks
659 writeend
= offset
+ resid
;
660 maxfblk
= lblkno(fs
, writeend
);
661 offblk
= lblkno(fs
, offset
);
663 * request size in fs blocks
665 nblk
= lblkno(fs
, blkroundup(fs
, resid
));
667 * Adjust for sparse files
670 nblk
= MIN(nblk
, ip
->i_blocks
);
673 * Adjust avgbfree (for testing)
675 avgbfree
= (ufs_trans_avgbfree
) ? 1 : ufsvfsp
->vfs_avgbfree
+ 1;
678 * Calculate maximum number of blocks of triple indirect
681 last2blk
= NDADDR
+ fni
+ fni
* fni
;
682 if (maxfblk
> last2blk
) {
686 if (offblk
> last2blk
)
687 n3blk
= maxfblk
- offblk
;
689 n3blk
= maxfblk
- last2blk
;
690 niblk
+= roundup(n3blk
* sizeof (daddr_t
), bsize
) / bsize
+ 1;
691 nl2ptr
= roundup(niblk
, fni
) / fni
+ 1;
692 niblk
+= roundup(nl2ptr
* sizeof (daddr_t
), bsize
) / bsize
+ 2;
696 * calculate maximum number of blocks of double indirect
699 if (maxfblk
> NDADDR
+ fni
) {
702 if (offblk
> NDADDR
+ fni
)
703 n2blk
= maxfblk
- offblk
;
705 n2blk
= maxfblk
- NDADDR
+ fni
;
706 niblk
+= roundup(n2blk
* sizeof (daddr_t
), bsize
) / bsize
+ 2;
710 * Add in indirect pointer block write
712 if (maxfblk
> NDADDR
) {
716 * Calculate deltas for indirect pointer writes
718 resv
+= niblk
* (fs
->fs_bsize
+ sizeof (struct delta
));
720 * maximum number of cg's needed for request
722 ncg
= nblk
/ avgbfree
;
723 if (ncg
> fs
->fs_ncg
)
727 * maximum amount of log space needed for request
730 resv
+= (ncg
- 2) * SIZECG(ip
);
736 * Calculate the amount of log space that needs to be reserved for this
737 * trunc request. If the amount of log space is too large, then
738 * calculate the the size that the requests needs to be split into.
741 ufs_trans_trunc_resv(
748 u_offset_t size
, offset
, resid
;
752 * *resvp is the amount of log space to reserve (in bytes).
753 * when nonzero, *residp is the number of bytes to truncate.
757 if (length
< ip
->i_size
) {
758 size
= ip
->i_size
- length
;
760 resv
= SIZECG(ip
) * 2 + INODESIZE
+ 1024;
762 * truncate up, doesn't really use much space,
763 * the default above should be sufficient.
774 * If this request takes too much log space, it will be split into
775 * "nchunks". If this split is not enough, linearly increment the
776 * nchunks in the next iteration.
778 for (; (resv
= ufs_log_amt(ip
, offset
, resid
, 1)) > ufs_trans_max_resv
;
779 offset
= length
+ (nchunks
- 1) * resid
) {
781 nchunks
= roundup(resv
, ufs_trans_max_resv
) /
787 resid
= size
/ nchunks
;
798 ufs_trans_itrunc(struct inode
*ip
, u_offset_t length
, int flags
, cred_t
*cr
)
800 int err
, issync
, resv
;
803 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
804 struct fs
*fs
= ufsvfsp
->vfs_fs
;
807 * Not logging; just do the trunc
809 if (!TRANS_ISTRANS(ufsvfsp
)) {
810 rw_enter(&ufsvfsp
->vfs_dqrwlock
, RW_READER
);
811 rw_enter(&ip
->i_contents
, RW_WRITER
);
812 err
= ufs_itrunc(ip
, length
, flags
, cr
);
813 rw_exit(&ip
->i_contents
);
814 rw_exit(&ufsvfsp
->vfs_dqrwlock
);
819 * within the lockfs protocol but *not* part of a transaction
821 do_block
= curthread
->t_flag
& T_DONTBLOCK
;
822 curthread
->t_flag
|= T_DONTBLOCK
;
825 * Trunc the file (in pieces, if necessary)
828 ufs_trans_trunc_resv(ip
, length
, &resv
, &resid
);
829 TRANS_BEGIN_CSYNC(ufsvfsp
, issync
, TOP_ITRUNC
, resv
);
830 rw_enter(&ufsvfsp
->vfs_dqrwlock
, RW_READER
);
831 rw_enter(&ip
->i_contents
, RW_WRITER
);
834 * resid is only set if we have to truncate in chunks
836 ASSERT(length
+ resid
< ip
->i_size
);
839 * Partially trunc file down to desired size (length).
840 * Only retain I_FREE on the last partial trunc.
841 * Round up size to a block boundary, to ensure the truncate
842 * doesn't have to allocate blocks. This is done both for
843 * performance and to fix a bug where if the block can't be
844 * allocated then the inode delete fails, but the inode
845 * is still freed with attached blocks and non-zero size
848 err
= ufs_itrunc(ip
, blkroundup(fs
, (ip
->i_size
- resid
)),
849 flags
& ~I_FREE
, cr
);
850 ASSERT(ip
->i_size
!= length
);
852 err
= ufs_itrunc(ip
, length
, flags
, cr
);
854 curthread
->t_flag
&= ~T_DONTBLOCK
;
855 rw_exit(&ip
->i_contents
);
856 rw_exit(&ufsvfsp
->vfs_dqrwlock
);
857 TRANS_END_CSYNC(ufsvfsp
, err
, issync
, TOP_ITRUNC
, resv
);
859 if ((err
== 0) && resid
) {
860 ufsvfsp
->vfs_avgbfree
= fs
->fs_cstotal
.cs_nbfree
/ fs
->fs_ncg
;
867 * Calculate the amount of log space that needs to be reserved for this
868 * write request. If the amount of log space is too large, then
869 * calculate the size that the requests needs to be split into.
870 * First try fixed chunks of size ufs_trans_max_resid. If that
871 * is too big, iterate down to the largest size that will fit.
872 * Pagein the pages in the first chunk here, so that the pagein is
873 * avoided later when the transaction is open.
876 ufs_trans_write_resv(
888 offset
= uio
->uio_offset
;
889 resid
= MIN(uio
->uio_resid
, ufs_trans_max_resid
);
890 resv
= ufs_log_amt(ip
, offset
, resid
, 0);
891 if (resv
<= ufs_trans_max_resv
) {
892 uio_prefaultpages(resid
, uio
);
893 if (resid
!= uio
->uio_resid
)
899 resid
= uio
->uio_resid
;
901 for (; (resv
= ufs_log_amt(ip
, offset
, resid
, 0)) > ufs_trans_max_resv
;
902 offset
= uio
->uio_offset
+ (nchunks
- 1) * resid
) {
904 resid
= uio
->uio_resid
/ nchunks
;
906 uio_prefaultpages(resid
, uio
);
908 * If this request takes too much log space, it will be split
916 * Issue write request.
918 * Split a large request into smaller chunks.
931 struct ufsvfs
*ufsvfsp
= ip
->i_ufsvfs
;
934 * since the write is too big and would "HOG THE LOG" it needs to
935 * be broken up and done in pieces. NOTE, the caller will
936 * issue the EOT after the request has been completed
938 realresid
= uio
->uio_resid
;
942 * Perform partial request (uiomove will update uio for us)
943 * Request is split up into "resid" size chunks until
944 * "realresid" bytes have been transferred.
946 uio
->uio_resid
= MIN(resid
, realresid
);
947 realresid
-= uio
->uio_resid
;
948 err
= wrip(ip
, uio
, ioflag
, cr
);
951 * Error or request is done; caller issues final EOT
953 if (err
|| uio
->uio_resid
|| (realresid
== 0)) {
954 uio
->uio_resid
+= realresid
;
959 * Generate EOT for this part of the request
961 rw_exit(&ip
->i_contents
);
962 rw_exit(&ufsvfsp
->vfs_dqrwlock
);
963 if (ioflag
& (FSYNC
|FDSYNC
)) {
964 TRANS_END_SYNC(ufsvfsp
, err
, TOP_WRITE_SYNC
, resv
);
966 TRANS_END_ASYNC(ufsvfsp
, TOP_WRITE
, resv
);
970 * Make sure the input buffer is resident before starting
971 * the next transaction.
973 uio_prefaultpages(MIN(resid
, realresid
), uio
);
976 * Generate BOT for next part of the request
978 if (ioflag
& (FSYNC
|FDSYNC
)) {
980 TRANS_BEGIN_SYNC(ufsvfsp
, TOP_WRITE_SYNC
, resv
, error
);
983 TRANS_BEGIN_ASYNC(ufsvfsp
, TOP_WRITE
, resv
);
985 rw_enter(&ufsvfsp
->vfs_dqrwlock
, RW_READER
);
986 rw_enter(&ip
->i_contents
, RW_WRITER
);
988 * Error during EOT (probably device error while writing commit rec)