1 /* $NetBSD: lfs_inode.c,v 1.147 2015/09/01 06:13:09 dholland Exp $ */
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 * Copyright (c) 1986, 1989, 1991, 1993
33 * The Regents of the University of California. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * @(#)lfs_inode.c 8.9 (Berkeley) 5/8/95
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.147 2015/09/01 06:13:09 dholland Exp $");
65 #if defined(_KERNEL_OPT)
66 #include "opt_quota.h"
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/mount.h>
72 #include <sys/malloc.h>
76 #include <sys/vnode.h>
77 #include <sys/kernel.h>
78 #include <sys/trace.h>
79 #include <sys/resourcevar.h>
80 #include <sys/kauth.h>
82 #include <ufs/lfs/ulfs_quotacommon.h>
83 #include <ufs/lfs/ulfs_inode.h>
84 #include <ufs/lfs/ulfsmount.h>
85 #include <ufs/lfs/ulfs_extern.h>
87 #include <ufs/lfs/lfs.h>
88 #include <ufs/lfs/lfs_accessors.h>
89 #include <ufs/lfs/lfs_extern.h>
90 #include <ufs/lfs/lfs_kernel.h>
92 static int lfs_update_seguse(struct lfs
*, struct inode
*ip
, long, size_t);
93 static int lfs_indirtrunc(struct inode
*, daddr_t
, daddr_t
,
94 daddr_t
, int, daddr_t
*, daddr_t
*,
96 static int lfs_blkfree (struct lfs
*, struct inode
*, daddr_t
, size_t, long *, size_t *);
97 static int lfs_vtruncbuf(struct vnode
*, daddr_t
, bool, int);
99 /* Search a block for a specific dinode. */
101 lfs_ifind(struct lfs
*fs
, ino_t ino
, struct buf
*bp
)
103 union lfs_dinode
*ldip
;
106 ASSERT_NO_SEGLOCK(fs
);
108 * Read the inode block backwards, since later versions of the
109 * inode will supercede earlier ones. Though it is unlikely, it is
110 * possible that the same inode will appear in the same inode block.
113 for (i
= num
; i
-- > 0; ) {
114 ldip
= DINO_IN_BLOCK(fs
, bp
->b_data
, i
);
115 if (lfs_dino_getinumber(fs
, ldip
) == ino
)
119 printf("searched %u entries for %ju\n", num
, (uintmax_t)ino
);
120 printf("offset is 0x%jx (seg %d)\n", (uintmax_t)lfs_sb_getoffset(fs
),
121 lfs_dtosn(fs
, lfs_sb_getoffset(fs
)));
122 printf("block is 0x%jx (seg %d)\n",
123 (uintmax_t)LFS_DBTOFSB(fs
, bp
->b_blkno
),
124 lfs_dtosn(fs
, LFS_DBTOFSB(fs
, bp
->b_blkno
)));
130 lfs_update(struct vnode
*vp
, const struct timespec
*acc
,
131 const struct timespec
*mod
, int updflags
)
134 struct lfs
*fs
= VFSTOULFS(vp
->v_mount
)->um_lfs
;
137 ASSERT_NO_SEGLOCK(fs
);
138 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
)
143 * If we are called from vinvalbuf, and the file's blocks have
144 * already been scheduled for writing, but the writes have not
145 * yet completed, lfs_vflush will not be called, and vinvalbuf
146 * will cause a panic. So, we must wait until any pending write
147 * for our inode completes, if we are called with UPDATE_WAIT set.
149 mutex_enter(vp
->v_interlock
);
150 while ((updflags
& (UPDATE_WAIT
|UPDATE_DIROP
)) == UPDATE_WAIT
&&
152 DLOG((DLOG_SEG
, "lfs_update: sleeping on ino %d"
153 " (in progress)\n", ip
->i_number
));
154 cv_wait(&vp
->v_cv
, vp
->v_interlock
);
156 mutex_exit(vp
->v_interlock
);
157 LFS_ITIMES(ip
, acc
, mod
, NULL
);
158 if (updflags
& UPDATE_CLOSE
)
159 flags
= ip
->i_flag
& (IN_MODIFIED
| IN_ACCESSED
| IN_CLEANING
);
161 flags
= ip
->i_flag
& (IN_MODIFIED
| IN_CLEANING
);
165 /* If sync, push back the vnode and any dirty blocks it may have. */
166 if ((updflags
& (UPDATE_WAIT
|UPDATE_DIROP
)) == UPDATE_WAIT
) {
167 /* Avoid flushing VU_DIROP. */
168 mutex_enter(&lfs_lock
);
170 while (vp
->v_uflag
& VU_DIROP
) {
171 DLOG((DLOG_DIROP
, "lfs_update: sleeping on inode %d"
172 " (dirops)\n", ip
->i_number
));
173 DLOG((DLOG_DIROP
, "lfs_update: vflags 0x%x, iflags"
175 vp
->v_iflag
| vp
->v_vflag
| vp
->v_uflag
,
177 if (fs
->lfs_dirops
== 0)
178 lfs_flush_fs(fs
, SEGM_SYNC
);
180 mtsleep(&fs
->lfs_writer
, PRIBIO
+1, "lfs_fsync",
182 /* XXX KS - by falling out here, are we writing the vn
186 mutex_exit(&lfs_lock
);
187 return lfs_vflush(vp
);
192 #define SINGLE 0 /* index of single indirect block */
193 #define DOUBLE 1 /* index of double indirect block */
194 #define TRIPLE 2 /* index of triple indirect block */
196 * Truncate the inode oip to at most length size, freeing the
199 /* VOP_BWRITE 1 + ULFS_NIADDR + lfs_balloc == 2 + 2*ULFS_NIADDR times */
202 lfs_truncate(struct vnode
*ovp
, off_t length
, int ioflag
, kauth_cred_t cred
)
205 struct inode
*oip
= VTOI(ovp
);
206 daddr_t bn
, lbn
, lastiblock
[ULFS_NIADDR
], indir_lbn
[ULFS_NIADDR
];
207 /* note: newblks is set but only actually used if DIAGNOSTIC */
208 daddr_t newblks
[ULFS_NDADDR
+ ULFS_NIADDR
] __diagused
;
211 int offset
, size
, level
;
212 daddr_t count
, rcount
;
213 daddr_t blocksreleased
= 0, real_released
= 0;
215 int aflags
, error
, allerror
= 0;
222 if (ovp
->v_type
== VCHR
|| ovp
->v_type
== VBLK
||
223 ovp
->v_type
== VFIFO
|| ovp
->v_type
== VSOCK
) {
224 KASSERT(oip
->i_size
== 0);
232 * Just return and not update modification times.
234 if (oip
->i_size
== length
) {
235 /* still do a uvm_vnp_setsize() as writesize may be larger */
236 uvm_vnp_setsize(ovp
, length
);
242 if (ovp
->v_type
== VLNK
&&
243 (oip
->i_size
< fs
->um_maxsymlinklen
||
244 (fs
->um_maxsymlinklen
== 0 &&
245 lfs_dino_getblocks(fs
, oip
->i_din
) == 0))) {
248 panic("lfs_truncate: partial truncate of symlink");
250 memset((char *)SHORTLINK(oip
), 0, (u_int
)oip
->i_size
);
252 lfs_dino_setsize(fs
, oip
->i_din
, 0);
253 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
254 return (lfs_update(ovp
, NULL
, NULL
, 0));
256 if (oip
->i_size
== length
) {
257 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
258 return (lfs_update(ovp
, NULL
, NULL
, 0));
262 usepc
= (ovp
->v_type
== VREG
&& ovp
!= fs
->lfs_ivnode
);
264 ASSERT_NO_SEGLOCK(fs
);
266 * Lengthen the size of the file. We must ensure that the
267 * last byte of the file is allocated. Since the smallest
268 * value of osize is 0, length will be at least 1.
270 if (osize
< length
) {
271 if (length
> fs
->um_maxfilesize
)
274 if (ioflag
& IO_SYNC
)
277 if (lfs_lblkno(fs
, osize
) < ULFS_NDADDR
&&
278 lfs_lblkno(fs
, osize
) != lfs_lblkno(fs
, length
) &&
279 lfs_blkroundup(fs
, osize
) != osize
) {
282 eob
= lfs_blkroundup(fs
, osize
);
283 uvm_vnp_setwritesize(ovp
, eob
);
284 error
= ulfs_balloc_range(ovp
, osize
,
285 eob
- osize
, cred
, aflags
);
287 (void) lfs_truncate(ovp
, osize
,
288 ioflag
& IO_SYNC
, cred
);
291 if (ioflag
& IO_SYNC
) {
292 mutex_enter(ovp
->v_interlock
);
294 trunc_page(osize
& lfs_sb_getbmask(fs
)),
296 PGO_CLEANIT
| PGO_SYNCIO
);
299 uvm_vnp_setwritesize(ovp
, length
);
300 error
= ulfs_balloc_range(ovp
, length
- 1, 1, cred
,
303 (void) lfs_truncate(ovp
, osize
,
304 ioflag
& IO_SYNC
, cred
);
307 uvm_vnp_setsize(ovp
, length
);
308 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
309 KASSERT(ovp
->v_size
== oip
->i_size
);
310 oip
->i_lfs_hiblk
= lfs_lblkno(fs
, oip
->i_size
+ lfs_sb_getbsize(fs
) - 1) - 1;
311 return (lfs_update(ovp
, NULL
, NULL
, 0));
313 error
= lfs_reserve(fs
, ovp
, NULL
,
314 lfs_btofsb(fs
, (ULFS_NIADDR
+ 2) << lfs_sb_getbshift(fs
)));
317 error
= lfs_balloc(ovp
, length
- 1, 1, cred
,
319 lfs_reserve(fs
, ovp
, NULL
,
320 -lfs_btofsb(fs
, (ULFS_NIADDR
+ 2) << lfs_sb_getbshift(fs
)));
323 oip
->i_size
= length
;
324 lfs_dino_setsize(fs
, oip
->i_din
, oip
->i_size
);
325 uvm_vnp_setsize(ovp
, length
);
326 (void) VOP_BWRITE(bp
->b_vp
, bp
);
327 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
328 oip
->i_lfs_hiblk
= lfs_lblkno(fs
, oip
->i_size
+ lfs_sb_getbsize(fs
) - 1) - 1;
329 return (lfs_update(ovp
, NULL
, NULL
, 0));
333 if ((error
= lfs_reserve(fs
, ovp
, NULL
,
334 lfs_btofsb(fs
, (2 * ULFS_NIADDR
+ 3) << lfs_sb_getbshift(fs
)))) != 0)
338 * Shorten the size of the file. If the file is not being
339 * truncated to a block boundary, the contents of the
340 * partial block following the end of the file must be
341 * zero'ed in case it ever becomes accessible again because
342 * of subsequent file growth. Directories however are not
343 * zero'ed as they should grow back initialized to empty.
345 offset
= lfs_blkoff(fs
, length
);
349 if (ovp
!= fs
->lfs_ivnode
)
350 lfs_seglock(fs
, SEGM_PROT
);
352 oip
->i_size
= length
;
353 lfs_dino_setsize(fs
, oip
->i_din
, oip
->i_size
);
355 lbn
= lfs_lblkno(fs
, length
);
357 if (ioflag
& IO_SYNC
)
359 error
= lfs_balloc(ovp
, length
- 1, 1, cred
, aflags
, &bp
);
361 lfs_reserve(fs
, ovp
, NULL
,
362 -lfs_btofsb(fs
, (2 * ULFS_NIADDR
+ 3) << lfs_sb_getbshift(fs
)));
365 obufsize
= bp
->b_bufsize
;
366 odb
= lfs_btofsb(fs
, bp
->b_bcount
);
367 oip
->i_size
= length
;
368 lfs_dino_setsize(fs
, oip
->i_din
, oip
->i_size
);
369 size
= lfs_blksize(fs
, oip
, lbn
);
370 if (ovp
->v_type
!= VDIR
)
371 memset((char *)bp
->b_data
+ offset
, 0,
372 (u_int
)(size
- offset
));
373 allocbuf(bp
, size
, 1);
374 if ((bp
->b_flags
& B_LOCKED
) != 0 && bp
->b_iodone
== NULL
) {
375 mutex_enter(&lfs_lock
);
376 locked_queue_bytes
-= obufsize
- bp
->b_bufsize
;
377 mutex_exit(&lfs_lock
);
379 if (bp
->b_oflags
& BO_DELWRI
) {
380 lfs_sb_addavail(fs
, odb
- lfs_btofsb(fs
, size
));
381 /* XXX shouldn't this wake up on lfs_availsleep? */
383 (void) VOP_BWRITE(bp
->b_vp
, bp
);
384 } else { /* vp->v_type == VREG && length < osize && offset != 0 */
386 * When truncating a regular file down to a non-block-aligned
387 * size, we must zero the part of last block which is past
388 * the new EOF. We must synchronously flush the zeroed pages
389 * to disk since the new pages will be invalidated as soon
390 * as we inform the VM system of the new, smaller size.
391 * We must do this before acquiring the GLOCK, since fetching
392 * the pages will acquire the GLOCK internally.
393 * So there is a window where another thread could see a whole
394 * zeroed page past EOF, but that's life.
399 aflags
= ioflag
& IO_SYNC
? B_SYNC
: 0;
400 error
= ulfs_balloc_range(ovp
, length
- 1, 1, cred
, aflags
);
402 lfs_reserve(fs
, ovp
, NULL
,
403 -lfs_btofsb(fs
, (2 * ULFS_NIADDR
+ 3) << lfs_sb_getbshift(fs
)));
406 xlbn
= lfs_lblkno(fs
, length
);
407 size
= lfs_blksize(fs
, oip
, xlbn
);
408 eoz
= MIN(lfs_lblktosize(fs
, xlbn
) + size
, osize
);
409 ubc_zerorange(&ovp
->v_uobj
, length
, eoz
- length
,
410 UBC_UNMAP_FLAG(ovp
));
411 if (round_page(eoz
) > round_page(length
)) {
412 mutex_enter(ovp
->v_interlock
);
413 error
= VOP_PUTPAGES(ovp
, round_page(length
),
415 PGO_CLEANIT
| PGO_DEACTIVATE
|
416 ((ioflag
& IO_SYNC
) ? PGO_SYNCIO
: 0));
418 lfs_reserve(fs
, ovp
, NULL
,
419 -lfs_btofsb(fs
, (2 * ULFS_NIADDR
+ 3) << lfs_sb_getbshift(fs
)));
425 genfs_node_wrlock(ovp
);
427 oip
->i_size
= length
;
428 lfs_dino_setsize(fs
, oip
->i_din
, oip
->i_size
);
429 uvm_vnp_setsize(ovp
, length
);
432 * Calculate index into inode's block list of
433 * last direct and indirect blocks (if any)
434 * which we want to keep. Lastblock is -1 when
435 * the file is truncated to 0.
437 /* Avoid sign overflow - XXX assumes that off_t is a quad_t. */
438 if (length
> QUAD_MAX
- lfs_sb_getbsize(fs
))
439 lastblock
= lfs_lblkno(fs
, QUAD_MAX
- lfs_sb_getbsize(fs
));
441 lastblock
= lfs_lblkno(fs
, length
+ lfs_sb_getbsize(fs
) - 1) - 1;
442 lastiblock
[SINGLE
] = lastblock
- ULFS_NDADDR
;
443 lastiblock
[DOUBLE
] = lastiblock
[SINGLE
] - LFS_NINDIR(fs
);
444 lastiblock
[TRIPLE
] = lastiblock
[DOUBLE
] - LFS_NINDIR(fs
) * LFS_NINDIR(fs
);
445 nblocks
= lfs_btofsb(fs
, lfs_sb_getbsize(fs
));
447 * Record changed file and block pointers before we start
448 * freeing blocks. lastiblock values are also normalized to -1
449 * for calls to lfs_indirtrunc below.
451 for (i
=0; i
<ULFS_NDADDR
; i
++) {
452 newblks
[i
] = lfs_dino_getdb(fs
, oip
->i_din
, i
);
454 for (i
=0; i
<ULFS_NIADDR
; i
++) {
455 newblks
[ULFS_NDADDR
+ i
] = lfs_dino_getib(fs
, oip
->i_din
, i
);
457 for (level
= TRIPLE
; level
>= SINGLE
; level
--)
458 if (lastiblock
[level
] < 0) {
459 newblks
[ULFS_NDADDR
+level
] = 0;
460 lastiblock
[level
] = -1;
462 for (i
= ULFS_NDADDR
- 1; i
> lastblock
; i
--)
466 lfs_dino_setsize(fs
, oip
->i_din
, oip
->i_size
);
467 error
= lfs_vtruncbuf(ovp
, lastblock
+ 1, false, 0);
468 if (error
&& !allerror
)
472 * Indirect blocks first.
474 indir_lbn
[SINGLE
] = -ULFS_NDADDR
;
475 indir_lbn
[DOUBLE
] = indir_lbn
[SINGLE
] - LFS_NINDIR(fs
) - 1;
476 indir_lbn
[TRIPLE
] = indir_lbn
[DOUBLE
] - LFS_NINDIR(fs
) * LFS_NINDIR(fs
) - 1;
477 for (level
= TRIPLE
; level
>= SINGLE
; level
--) {
478 bn
= lfs_dino_getib(fs
, oip
->i_din
, level
);
480 error
= lfs_indirtrunc(oip
, indir_lbn
[level
],
481 bn
, lastiblock
[level
],
482 level
, &count
, &rcount
,
486 real_released
+= rcount
;
487 blocksreleased
+= count
;
488 if (lastiblock
[level
] < 0) {
489 if (lfs_dino_getib(fs
, oip
->i_din
, level
) > 0)
490 real_released
+= nblocks
;
491 blocksreleased
+= nblocks
;
492 lfs_dino_setib(fs
, oip
->i_din
, level
, 0);
493 lfs_blkfree(fs
, oip
, bn
, lfs_sb_getbsize(fs
),
495 lfs_deregister_block(ovp
, bn
);
498 if (lastiblock
[level
] >= 0)
503 * All whole direct blocks or frags.
505 for (i
= ULFS_NDADDR
- 1; i
> lastblock
; i
--) {
508 bn
= lfs_dino_getdb(fs
, oip
->i_din
, i
);
511 bsize
= lfs_blksize(fs
, oip
, i
);
512 if (lfs_dino_getdb(fs
, oip
->i_din
, i
) > 0) {
513 /* Check for fragment size changes */
514 obsize
= oip
->i_lfs_fragsize
[i
];
515 real_released
+= lfs_btofsb(fs
, obsize
);
516 oip
->i_lfs_fragsize
[i
] = 0;
519 blocksreleased
+= lfs_btofsb(fs
, bsize
);
520 lfs_dino_setdb(fs
, oip
->i_din
, i
, 0);
521 lfs_blkfree(fs
, oip
, bn
, obsize
, &lastseg
, &bc
);
522 lfs_deregister_block(ovp
, bn
);
528 * Finally, look for a change in size of the
529 * last direct block; release any frags.
531 bn
= lfs_dino_getdb(fs
, oip
->i_din
, lastblock
);
533 long oldspace
, newspace
;
539 * Calculate amount of space we're giving
540 * back as old block size minus new block size.
542 oldspace
= lfs_blksize(fs
, oip
, lastblock
);
544 olddspace
= oip
->i_lfs_fragsize
[lastblock
];
547 oip
->i_size
= length
;
548 lfs_dino_setsize(fs
, oip
->i_din
, oip
->i_size
);
549 newspace
= lfs_blksize(fs
, oip
, lastblock
);
551 panic("itrunc: newspace");
552 if (oldspace
- newspace
> 0) {
553 blocksreleased
+= lfs_btofsb(fs
, oldspace
- newspace
);
556 if (bn
> 0 && olddspace
- newspace
> 0) {
557 /* No segment accounting here, just vnode */
558 real_released
+= lfs_btofsb(fs
, olddspace
- newspace
);
564 /* Finish segment accounting corrections */
565 lfs_update_seguse(fs
, oip
, lastseg
, bc
);
567 for (level
= SINGLE
; level
<= TRIPLE
; level
++)
568 if ((newblks
[ULFS_NDADDR
+ level
] == 0) !=
569 (lfs_dino_getib(fs
, oip
->i_din
, level
) == 0)) {
570 panic("lfs itrunc1");
572 for (i
= 0; i
< ULFS_NDADDR
; i
++)
573 if ((newblks
[i
] == 0) !=
574 (lfs_dino_getdb(fs
, oip
->i_din
, i
) == 0)) {
575 panic("lfs itrunc2");
578 (!LIST_EMPTY(&ovp
->v_cleanblkhd
) || !LIST_EMPTY(&ovp
->v_dirtyblkhd
)))
579 panic("lfs itrunc3");
580 #endif /* DIAGNOSTIC */
582 * Put back the real size.
584 oip
->i_size
= length
;
585 lfs_dino_setsize(fs
, oip
->i_din
, oip
->i_size
);
586 oip
->i_lfs_effnblks
-= blocksreleased
;
587 lfs_dino_setblocks(fs
, oip
->i_din
,
588 lfs_dino_getblocks(fs
, oip
->i_din
) - real_released
);
589 mutex_enter(&lfs_lock
);
590 lfs_sb_addbfree(fs
, blocksreleased
);
591 mutex_exit(&lfs_lock
);
593 if (oip
->i_size
== 0 &&
594 (lfs_dino_getblocks(fs
, oip
->i_din
) != 0 || oip
->i_lfs_effnblks
!= 0)) {
595 printf("lfs_truncate: truncate to 0 but %jd blks/%jd effblks\n",
596 (intmax_t)lfs_dino_getblocks(fs
, oip
->i_din
),
597 (intmax_t)oip
->i_lfs_effnblks
);
598 panic("lfs_truncate: persistent blocks");
603 * If we truncated to zero, take us off the paging queue.
605 mutex_enter(&lfs_lock
);
606 if (oip
->i_size
== 0 && oip
->i_flags
& IN_PAGING
) {
607 oip
->i_flags
&= ~IN_PAGING
;
608 TAILQ_REMOVE(&fs
->lfs_pchainhd
, oip
, i_lfs_pchain
);
610 mutex_exit(&lfs_lock
);
612 oip
->i_flag
|= IN_CHANGE
;
613 #if defined(LFS_QUOTA) || defined(LFS_QUOTA2)
614 (void) lfs_chkdq(oip
, -blocksreleased
, NOCRED
, 0);
616 lfs_reserve(fs
, ovp
, NULL
,
617 -lfs_btofsb(fs
, (2 * ULFS_NIADDR
+ 3) << lfs_sb_getbshift(fs
)));
618 genfs_node_unlock(ovp
);
620 oip
->i_lfs_hiblk
= lfs_lblkno(fs
, oip
->i_size
+ lfs_sb_getbsize(fs
) - 1) - 1;
621 if (ovp
!= fs
->lfs_ivnode
)
623 return (allerror
? allerror
: error
);
626 /* Update segment and avail usage information when removing a block. */
628 lfs_blkfree(struct lfs
*fs
, struct inode
*ip
, daddr_t daddr
,
629 size_t bsize
, long *lastseg
, size_t *num
)
635 bsize
= lfs_fragroundup(fs
, bsize
);
637 if (*lastseg
!= (seg
= lfs_dtosn(fs
, daddr
))) {
638 error
= lfs_update_seguse(fs
, ip
, *lastseg
, *num
);
648 /* Finish the accounting updates for a segment. */
650 lfs_update_seguse(struct lfs
*fs
, struct inode
*ip
, long lastseg
, size_t num
)
655 if (lastseg
< 0 || num
== 0)
658 LIST_FOREACH(sd
, &ip
->i_lfs_segdhd
, list
)
659 if (sd
->segnum
== lastseg
)
662 sd
= malloc(sizeof(*sd
), M_SEGMENT
, M_WAITOK
);
663 sd
->segnum
= lastseg
;
665 LIST_INSERT_HEAD(&ip
->i_lfs_segdhd
, sd
, list
);
673 lfs_finalize_seguse(struct lfs
*fs
, void *v
)
678 LIST_HEAD(, segdelta
) *hd
= v
;
681 while((sd
= LIST_FIRST(hd
)) != NULL
) {
682 LIST_REMOVE(sd
, list
);
683 LFS_SEGENTRY(sup
, fs
, sd
->segnum
, bp
);
684 if (sd
->num
> sup
->su_nbytes
) {
685 printf("lfs_finalize_seguse: segment %ld short by %ld\n",
686 sd
->segnum
, (long)(sd
->num
- sup
->su_nbytes
));
687 panic("lfs_finalize_seguse: negative bytes");
688 sup
->su_nbytes
= sd
->num
;
690 sup
->su_nbytes
-= sd
->num
;
691 LFS_WRITESEGENTRY(sup
, fs
, sd
->segnum
, bp
);
696 /* Finish the accounting updates for a segment. */
698 lfs_finalize_ino_seguse(struct lfs
*fs
, struct inode
*ip
)
701 lfs_finalize_seguse(fs
, &ip
->i_lfs_segdhd
);
704 /* Finish the accounting updates for a segment. */
706 lfs_finalize_fs_seguse(struct lfs
*fs
)
709 lfs_finalize_seguse(fs
, &fs
->lfs_segdhd
);
713 * Release blocks associated with the inode ip and stored in the indirect
714 * block bn. Blocks are free'd in LIFO order up to (but not including)
715 * lastbn. If level is greater than SINGLE, the block is an indirect block
716 * and recursive calls to indirtrunc must be used to cleanse other indirect
719 * NB: triple indirect blocks are untested.
722 lfs_indirtrunc(struct inode
*ip
, daddr_t lbn
, daddr_t dbn
,
723 daddr_t lastbn
, int level
, daddr_t
*countp
,
724 daddr_t
*rcountp
, long *lastsegp
, size_t *bcp
)
728 struct lfs
*fs
= ip
->i_lfs
;
732 daddr_t nb
, nlbn
, last
;
733 daddr_t blkcount
, rblkcount
, factor
;
735 daddr_t blocksreleased
= 0, real_released
= 0;
736 int error
= 0, allerror
= 0;
740 * Calculate index in current block of last
741 * block to be kept. -1 indicates the entire
742 * block so we need not calculate the index.
745 for (i
= SINGLE
; i
< level
; i
++)
746 factor
*= LFS_NINDIR(fs
);
750 nblocks
= lfs_btofsb(fs
, lfs_sb_getbsize(fs
));
752 * Get buffer of block pointers, zero those entries corresponding
753 * to blocks to be free'd, and update on disk copy first. Since
754 * double(triple) indirect before single(double) indirect, calls
755 * to bmap on these blocks will fail. However, we already have
756 * the on disk address, so we have to set the b_blkno field
757 * explicitly instead of letting bread do everything for us.
760 bp
= getblk(vp
, lbn
, lfs_sb_getbsize(fs
), 0, 0);
761 if (bp
->b_oflags
& (BO_DONE
| BO_DELWRI
)) {
762 /* Braces must be here in case trace evaluates to nothing. */
763 trace(TR_BREADHIT
, pack(vp
, lfs_sb_getbsize(fs
)), lbn
);
765 trace(TR_BREADMISS
, pack(vp
, lfs_sb_getbsize(fs
)), lbn
);
766 curlwp
->l_ru
.ru_inblock
++; /* pay for read */
767 bp
->b_flags
|= B_READ
;
768 if (bp
->b_bcount
> bp
->b_bufsize
)
769 panic("lfs_indirtrunc: bad buffer size");
770 bp
->b_blkno
= LFS_FSBTODB(fs
, dbn
);
771 VOP_STRATEGY(vp
, bp
);
776 *countp
= *rcountp
= 0;
782 * We still need this block, so copy the data for
783 * subsequent processing; then in the original block,
784 * zero out the dying block pointers and send it off.
786 bap
= lfs_malloc(fs
, lfs_sb_getbsize(fs
), LFS_NB_IBLOCK
);
787 memcpy(bap
, bp
->b_data
, lfs_sb_getbsize(fs
));
788 bap_needs_free
= true;
790 for (i
= last
+ 1; i
< LFS_NINDIR(fs
); i
++) {
791 lfs_iblock_set(fs
, bp
->b_data
, i
, 0);
793 error
= VOP_BWRITE(bp
->b_vp
, bp
);
798 bap_needs_free
= false;
802 * Recursively free totally unused blocks.
804 for (i
= LFS_NINDIR(fs
) - 1, nlbn
= lbn
+ 1 - i
* factor
; i
> last
;
805 i
--, nlbn
+= factor
) {
806 nb
= lfs_iblock_get(fs
, bap
, i
);
809 if (level
> SINGLE
) {
810 error
= lfs_indirtrunc(ip
, nlbn
, nb
,
811 (daddr_t
)-1, level
- 1,
812 &blkcount
, &rblkcount
,
816 blocksreleased
+= blkcount
;
817 real_released
+= rblkcount
;
819 lfs_blkfree(fs
, ip
, nb
, lfs_sb_getbsize(fs
), lastsegp
, bcp
);
820 if (lfs_iblock_get(fs
, bap
, i
) > 0)
821 real_released
+= nblocks
;
822 blocksreleased
+= nblocks
;
826 * Recursively free last partial block.
828 if (level
> SINGLE
&& lastbn
>= 0) {
829 last
= lastbn
% factor
;
830 nb
= lfs_iblock_get(fs
, bap
, i
);
832 error
= lfs_indirtrunc(ip
, nlbn
, nb
,
833 last
, level
- 1, &blkcount
,
834 &rblkcount
, lastsegp
, bcp
);
837 real_released
+= rblkcount
;
838 blocksreleased
+= blkcount
;
842 if (bap_needs_free
) {
843 lfs_free(fs
, bap
, LFS_NB_IBLOCK
);
845 mutex_enter(&bufcache_lock
);
846 if (bp
->b_oflags
& BO_DELWRI
) {
848 lfs_sb_addavail(fs
, lfs_btofsb(fs
, bp
->b_bcount
));
849 wakeup(&fs
->lfs_availsleep
);
851 brelsel(bp
, BC_INVAL
);
852 mutex_exit(&bufcache_lock
);
855 *countp
= blocksreleased
;
856 *rcountp
= real_released
;
861 * Destroy any in core blocks past the truncation length.
862 * Inlined from vtruncbuf, so that lfs_avail could be updated.
863 * We take the seglock to prevent cleaning from occurring while we are
864 * invalidating blocks.
867 lfs_vtruncbuf(struct vnode
*vp
, daddr_t lbn
, bool catch, int slptimeo
)
869 struct buf
*bp
, *nbp
;
874 off
= round_page((voff_t
)lbn
<< vp
->v_mount
->mnt_fs_bshift
);
875 mutex_enter(vp
->v_interlock
);
876 error
= VOP_PUTPAGES(vp
, off
, 0, PGO_FREE
| PGO_SYNCIO
);
880 fs
= VTOI(vp
)->i_lfs
;
884 mutex_enter(&bufcache_lock
);
886 for (bp
= LIST_FIRST(&vp
->v_cleanblkhd
); bp
; bp
= nbp
) {
887 nbp
= LIST_NEXT(bp
, b_vnbufs
);
888 if (bp
->b_lblkno
< lbn
)
890 error
= bbusy(bp
, catch, slptimeo
, NULL
);
891 if (error
== EPASSTHROUGH
)
894 mutex_exit(&bufcache_lock
);
897 mutex_enter(bp
->b_objlock
);
898 if (bp
->b_oflags
& BO_DELWRI
) {
899 bp
->b_oflags
&= ~BO_DELWRI
;
900 lfs_sb_addavail(fs
, lfs_btofsb(fs
, bp
->b_bcount
));
901 wakeup(&fs
->lfs_availsleep
);
903 mutex_exit(bp
->b_objlock
);
905 brelsel(bp
, BC_INVAL
| BC_VFLUSH
);
908 for (bp
= LIST_FIRST(&vp
->v_dirtyblkhd
); bp
; bp
= nbp
) {
909 nbp
= LIST_NEXT(bp
, b_vnbufs
);
910 if (bp
->b_lblkno
< lbn
)
912 error
= bbusy(bp
, catch, slptimeo
, NULL
);
913 if (error
== EPASSTHROUGH
)
916 mutex_exit(&bufcache_lock
);
919 mutex_enter(bp
->b_objlock
);
920 if (bp
->b_oflags
& BO_DELWRI
) {
921 bp
->b_oflags
&= ~BO_DELWRI
;
922 lfs_sb_addavail(fs
, lfs_btofsb(fs
, bp
->b_bcount
));
923 wakeup(&fs
->lfs_availsleep
);
925 mutex_exit(bp
->b_objlock
);
927 brelsel(bp
, BC_INVAL
| BC_VFLUSH
);
929 mutex_exit(&bufcache_lock
);