1 /* $NetBSD: lfs_inode.c,v 1.126 2011/11/23 19:42:10 bouyer Exp $ */
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 * Copyright (c) 1986, 1989, 1991, 1993
33 * The Regents of the University of California. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * @(#)lfs_inode.c 8.9 (Berkeley) 5/8/95
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.126 2011/11/23 19:42:10 bouyer Exp $");
65 #if defined(_KERNEL_OPT)
66 #include "opt_quota.h"
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/mount.h>
72 #include <sys/malloc.h>
76 #include <sys/vnode.h>
77 #include <sys/kernel.h>
78 #include <sys/trace.h>
79 #include <sys/resourcevar.h>
80 #include <sys/kauth.h>
82 #include <ufs/ufs/quota.h>
83 #include <ufs/ufs/inode.h>
84 #include <ufs/ufs/ufsmount.h>
85 #include <ufs/ufs/ufs_extern.h>
87 #include <ufs/lfs/lfs.h>
88 #include <ufs/lfs/lfs_extern.h>
90 static int lfs_update_seguse(struct lfs
*, struct inode
*ip
, long, size_t);
91 static int lfs_indirtrunc (struct inode
*, daddr_t
, daddr_t
,
92 daddr_t
, int, long *, long *, long *, size_t *);
93 static int lfs_blkfree (struct lfs
*, struct inode
*, daddr_t
, size_t, long *, size_t *);
94 static int lfs_vtruncbuf(struct vnode
*, daddr_t
, bool, int);
96 /* Search a block for a specific dinode. */
98 lfs_ifind(struct lfs
*fs
, ino_t ino
, struct buf
*bp
)
100 struct ufs1_dinode
*dip
= (struct ufs1_dinode
*)bp
->b_data
;
101 struct ufs1_dinode
*ldip
, *fin
;
103 ASSERT_NO_SEGLOCK(fs
);
105 * Read the inode block backwards, since later versions of the
106 * inode will supercede earlier ones. Though it is unlikely, it is
107 * possible that the same inode will appear in the same inode block.
109 fin
= dip
+ INOPB(fs
);
110 for (ldip
= fin
- 1; ldip
>= dip
; --ldip
)
111 if (ldip
->di_inumber
== ino
)
114 printf("searched %d entries\n", (int)(fin
- dip
));
115 printf("offset is 0x%x (seg %d)\n", fs
->lfs_offset
,
116 dtosn(fs
, fs
->lfs_offset
));
117 printf("block is 0x%llx (seg %lld)\n",
118 (unsigned long long)dbtofsb(fs
, bp
->b_blkno
),
119 (long long)dtosn(fs
, dbtofsb(fs
, bp
->b_blkno
)));
125 lfs_update(struct vnode
*vp
, const struct timespec
*acc
,
126 const struct timespec
*mod
, int updflags
)
129 struct lfs
*fs
= VFSTOUFS(vp
->v_mount
)->um_lfs
;
132 ASSERT_NO_SEGLOCK(fs
);
133 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
)
138 * If we are called from vinvalbuf, and the file's blocks have
139 * already been scheduled for writing, but the writes have not
140 * yet completed, lfs_vflush will not be called, and vinvalbuf
141 * will cause a panic. So, we must wait until any pending write
142 * for our inode completes, if we are called with UPDATE_WAIT set.
144 mutex_enter(vp
->v_interlock
);
145 while ((updflags
& (UPDATE_WAIT
|UPDATE_DIROP
)) == UPDATE_WAIT
&&
147 DLOG((DLOG_SEG
, "lfs_update: sleeping on ino %d"
148 " (in progress)\n", ip
->i_number
));
149 cv_wait(&vp
->v_cv
, vp
->v_interlock
);
151 mutex_exit(vp
->v_interlock
);
152 LFS_ITIMES(ip
, acc
, mod
, NULL
);
153 if (updflags
& UPDATE_CLOSE
)
154 flags
= ip
->i_flag
& (IN_MODIFIED
| IN_ACCESSED
| IN_CLEANING
);
156 flags
= ip
->i_flag
& (IN_MODIFIED
| IN_CLEANING
);
160 /* If sync, push back the vnode and any dirty blocks it may have. */
161 if ((updflags
& (UPDATE_WAIT
|UPDATE_DIROP
)) == UPDATE_WAIT
) {
162 /* Avoid flushing VU_DIROP. */
163 mutex_enter(&lfs_lock
);
165 while (vp
->v_uflag
& VU_DIROP
) {
166 DLOG((DLOG_DIROP
, "lfs_update: sleeping on inode %d"
167 " (dirops)\n", ip
->i_number
));
168 DLOG((DLOG_DIROP
, "lfs_update: vflags 0x%x, iflags"
170 vp
->v_iflag
| vp
->v_vflag
| vp
->v_uflag
,
172 if (fs
->lfs_dirops
== 0)
173 lfs_flush_fs(fs
, SEGM_SYNC
);
175 mtsleep(&fs
->lfs_writer
, PRIBIO
+1, "lfs_fsync",
177 /* XXX KS - by falling out here, are we writing the vn
181 mutex_exit(&lfs_lock
);
182 return lfs_vflush(vp
);
187 #define SINGLE 0 /* index of single indirect block */
188 #define DOUBLE 1 /* index of double indirect block */
189 #define TRIPLE 2 /* index of triple indirect block */
191 * Truncate the inode oip to at most length size, freeing the
194 /* VOP_BWRITE 1 + NIADDR + lfs_balloc == 2 + 2*NIADDR times */
197 lfs_truncate(struct vnode
*ovp
, off_t length
, int ioflag
, kauth_cred_t cred
)
200 struct inode
*oip
= VTOI(ovp
);
201 daddr_t bn
, lbn
, lastiblock
[NIADDR
], indir_lbn
[NIADDR
];
203 int32_t newblks
[NDADDR
+ NIADDR
];
206 int offset
, size
, level
;
207 long count
, rcount
, blocksreleased
= 0, real_released
= 0;
209 int aflags
, error
, allerror
= 0;
215 struct ufsmount
*ump
= oip
->i_ump
;
217 if (ovp
->v_type
== VCHR
|| ovp
->v_type
== VBLK
||
218 ovp
->v_type
== VFIFO
|| ovp
->v_type
== VSOCK
) {
219 KASSERT(oip
->i_size
== 0);
227 * Just return and not update modification times.
229 if (oip
->i_size
== length
) {
230 /* still do a uvm_vnp_setsize() as writesize may be larger */
231 uvm_vnp_setsize(ovp
, length
);
235 if (ovp
->v_type
== VLNK
&&
236 (oip
->i_size
< ump
->um_maxsymlinklen
||
237 (ump
->um_maxsymlinklen
== 0 &&
238 oip
->i_ffs1_blocks
== 0))) {
241 panic("lfs_truncate: partial truncate of symlink");
243 memset((char *)SHORTLINK(oip
), 0, (u_int
)oip
->i_size
);
244 oip
->i_size
= oip
->i_ffs1_size
= 0;
245 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
246 return (lfs_update(ovp
, NULL
, NULL
, 0));
248 if (oip
->i_size
== length
) {
249 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
250 return (lfs_update(ovp
, NULL
, NULL
, 0));
255 usepc
= (ovp
->v_type
== VREG
&& ovp
!= fs
->lfs_ivnode
);
257 ASSERT_NO_SEGLOCK(fs
);
259 * Lengthen the size of the file. We must ensure that the
260 * last byte of the file is allocated. Since the smallest
261 * value of osize is 0, length will be at least 1.
263 if (osize
< length
) {
264 if (length
> ump
->um_maxfilesize
)
267 if (ioflag
& IO_SYNC
)
270 if (lblkno(fs
, osize
) < NDADDR
&&
271 lblkno(fs
, osize
) != lblkno(fs
, length
) &&
272 blkroundup(fs
, osize
) != osize
) {
275 eob
= blkroundup(fs
, osize
);
276 uvm_vnp_setwritesize(ovp
, eob
);
277 error
= ufs_balloc_range(ovp
, osize
,
278 eob
- osize
, cred
, aflags
);
280 (void) lfs_truncate(ovp
, osize
,
281 ioflag
& IO_SYNC
, cred
);
284 if (ioflag
& IO_SYNC
) {
285 mutex_enter(ovp
->v_interlock
);
287 trunc_page(osize
& fs
->lfs_bmask
),
289 PGO_CLEANIT
| PGO_SYNCIO
);
292 uvm_vnp_setwritesize(ovp
, length
);
293 error
= ufs_balloc_range(ovp
, length
- 1, 1, cred
,
296 (void) lfs_truncate(ovp
, osize
,
297 ioflag
& IO_SYNC
, cred
);
300 uvm_vnp_setsize(ovp
, length
);
301 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
302 KASSERT(ovp
->v_size
== oip
->i_size
);
303 oip
->i_lfs_hiblk
= lblkno(fs
, oip
->i_size
+ fs
->lfs_bsize
- 1) - 1;
304 return (lfs_update(ovp
, NULL
, NULL
, 0));
306 error
= lfs_reserve(fs
, ovp
, NULL
,
307 btofsb(fs
, (NIADDR
+ 2) << fs
->lfs_bshift
));
310 error
= lfs_balloc(ovp
, length
- 1, 1, cred
,
312 lfs_reserve(fs
, ovp
, NULL
,
313 -btofsb(fs
, (NIADDR
+ 2) << fs
->lfs_bshift
));
316 oip
->i_ffs1_size
= oip
->i_size
= length
;
317 uvm_vnp_setsize(ovp
, length
);
318 (void) VOP_BWRITE(bp
->b_vp
, bp
);
319 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
320 oip
->i_lfs_hiblk
= lblkno(fs
, oip
->i_size
+ fs
->lfs_bsize
- 1) - 1;
321 return (lfs_update(ovp
, NULL
, NULL
, 0));
325 if ((error
= lfs_reserve(fs
, ovp
, NULL
,
326 btofsb(fs
, (2 * NIADDR
+ 3) << fs
->lfs_bshift
))) != 0)
330 * Shorten the size of the file. If the file is not being
331 * truncated to a block boundary, the contents of the
332 * partial block following the end of the file must be
333 * zero'ed in case it ever becomes accessible again because
334 * of subsequent file growth. Directories however are not
335 * zero'ed as they should grow back initialized to empty.
337 offset
= blkoff(fs
, length
);
341 if (ovp
!= fs
->lfs_ivnode
)
342 lfs_seglock(fs
, SEGM_PROT
);
344 oip
->i_size
= oip
->i_ffs1_size
= length
;
346 lbn
= lblkno(fs
, length
);
348 if (ioflag
& IO_SYNC
)
350 error
= lfs_balloc(ovp
, length
- 1, 1, cred
, aflags
, &bp
);
352 lfs_reserve(fs
, ovp
, NULL
,
353 -btofsb(fs
, (2 * NIADDR
+ 3) << fs
->lfs_bshift
));
356 obufsize
= bp
->b_bufsize
;
357 odb
= btofsb(fs
, bp
->b_bcount
);
358 oip
->i_size
= oip
->i_ffs1_size
= length
;
359 size
= blksize(fs
, oip
, lbn
);
360 if (ovp
->v_type
!= VDIR
)
361 memset((char *)bp
->b_data
+ offset
, 0,
362 (u_int
)(size
- offset
));
363 allocbuf(bp
, size
, 1);
364 if ((bp
->b_flags
& B_LOCKED
) != 0 && bp
->b_iodone
== NULL
) {
365 mutex_enter(&lfs_lock
);
366 locked_queue_bytes
-= obufsize
- bp
->b_bufsize
;
367 mutex_exit(&lfs_lock
);
369 if (bp
->b_oflags
& BO_DELWRI
)
370 fs
->lfs_avail
+= odb
- btofsb(fs
, size
);
371 (void) VOP_BWRITE(bp
->b_vp
, bp
);
372 } else { /* vp->v_type == VREG && length < osize && offset != 0 */
374 * When truncating a regular file down to a non-block-aligned
375 * size, we must zero the part of last block which is past
376 * the new EOF. We must synchronously flush the zeroed pages
377 * to disk since the new pages will be invalidated as soon
378 * as we inform the VM system of the new, smaller size.
379 * We must do this before acquiring the GLOCK, since fetching
380 * the pages will acquire the GLOCK internally.
381 * So there is a window where another thread could see a whole
382 * zeroed page past EOF, but that's life.
387 aflags
= ioflag
& IO_SYNC
? B_SYNC
: 0;
388 error
= ufs_balloc_range(ovp
, length
- 1, 1, cred
, aflags
);
390 lfs_reserve(fs
, ovp
, NULL
,
391 -btofsb(fs
, (2 * NIADDR
+ 3) << fs
->lfs_bshift
));
394 xlbn
= lblkno(fs
, length
);
395 size
= blksize(fs
, oip
, xlbn
);
396 eoz
= MIN(lblktosize(fs
, xlbn
) + size
, osize
);
397 ubc_zerorange(&ovp
->v_uobj
, length
, eoz
- length
,
398 UBC_UNMAP_FLAG(ovp
));
399 if (round_page(eoz
) > round_page(length
)) {
400 mutex_enter(ovp
->v_interlock
);
401 error
= VOP_PUTPAGES(ovp
, round_page(length
),
403 PGO_CLEANIT
| PGO_DEACTIVATE
|
404 ((ioflag
& IO_SYNC
) ? PGO_SYNCIO
: 0));
406 lfs_reserve(fs
, ovp
, NULL
,
407 -btofsb(fs
, (2 * NIADDR
+ 3) << fs
->lfs_bshift
));
413 genfs_node_wrlock(ovp
);
415 oip
->i_size
= oip
->i_ffs1_size
= length
;
416 uvm_vnp_setsize(ovp
, length
);
419 * Calculate index into inode's block list of
420 * last direct and indirect blocks (if any)
421 * which we want to keep. Lastblock is -1 when
422 * the file is truncated to 0.
424 /* Avoid sign overflow - XXX assumes that off_t is a quad_t. */
425 if (length
> QUAD_MAX
- fs
->lfs_bsize
)
426 lastblock
= lblkno(fs
, QUAD_MAX
- fs
->lfs_bsize
);
428 lastblock
= lblkno(fs
, length
+ fs
->lfs_bsize
- 1) - 1;
429 lastiblock
[SINGLE
] = lastblock
- NDADDR
;
430 lastiblock
[DOUBLE
] = lastiblock
[SINGLE
] - NINDIR(fs
);
431 lastiblock
[TRIPLE
] = lastiblock
[DOUBLE
] - NINDIR(fs
) * NINDIR(fs
);
432 nblocks
= btofsb(fs
, fs
->lfs_bsize
);
434 * Record changed file and block pointers before we start
435 * freeing blocks. lastiblock values are also normalized to -1
436 * for calls to lfs_indirtrunc below.
438 memcpy((void *)newblks
, (void *)&oip
->i_ffs1_db
[0], sizeof newblks
);
439 for (level
= TRIPLE
; level
>= SINGLE
; level
--)
440 if (lastiblock
[level
] < 0) {
441 newblks
[NDADDR
+level
] = 0;
442 lastiblock
[level
] = -1;
444 for (i
= NDADDR
- 1; i
> lastblock
; i
--)
447 oip
->i_size
= oip
->i_ffs1_size
= osize
;
448 error
= lfs_vtruncbuf(ovp
, lastblock
+ 1, false, 0);
449 if (error
&& !allerror
)
453 * Indirect blocks first.
455 indir_lbn
[SINGLE
] = -NDADDR
;
456 indir_lbn
[DOUBLE
] = indir_lbn
[SINGLE
] - NINDIR(fs
) - 1;
457 indir_lbn
[TRIPLE
] = indir_lbn
[DOUBLE
] - NINDIR(fs
) * NINDIR(fs
) - 1;
458 for (level
= TRIPLE
; level
>= SINGLE
; level
--) {
459 bn
= oip
->i_ffs1_ib
[level
];
461 error
= lfs_indirtrunc(oip
, indir_lbn
[level
],
462 bn
, lastiblock
[level
],
463 level
, &count
, &rcount
,
467 real_released
+= rcount
;
468 blocksreleased
+= count
;
469 if (lastiblock
[level
] < 0) {
470 if (oip
->i_ffs1_ib
[level
] > 0)
471 real_released
+= nblocks
;
472 blocksreleased
+= nblocks
;
473 oip
->i_ffs1_ib
[level
] = 0;
474 lfs_blkfree(fs
, oip
, bn
, fs
->lfs_bsize
,
476 lfs_deregister_block(ovp
, bn
);
479 if (lastiblock
[level
] >= 0)
484 * All whole direct blocks or frags.
486 for (i
= NDADDR
- 1; i
> lastblock
; i
--) {
489 bn
= oip
->i_ffs1_db
[i
];
492 bsize
= blksize(fs
, oip
, i
);
493 if (oip
->i_ffs1_db
[i
] > 0) {
494 /* Check for fragment size changes */
495 obsize
= oip
->i_lfs_fragsize
[i
];
496 real_released
+= btofsb(fs
, obsize
);
497 oip
->i_lfs_fragsize
[i
] = 0;
500 blocksreleased
+= btofsb(fs
, bsize
);
501 oip
->i_ffs1_db
[i
] = 0;
502 lfs_blkfree(fs
, oip
, bn
, obsize
, &lastseg
, &bc
);
503 lfs_deregister_block(ovp
, bn
);
509 * Finally, look for a change in size of the
510 * last direct block; release any frags.
512 bn
= oip
->i_ffs1_db
[lastblock
];
514 long oldspace
, newspace
;
520 * Calculate amount of space we're giving
521 * back as old block size minus new block size.
523 oldspace
= blksize(fs
, oip
, lastblock
);
525 olddspace
= oip
->i_lfs_fragsize
[lastblock
];
528 oip
->i_size
= oip
->i_ffs1_size
= length
;
529 newspace
= blksize(fs
, oip
, lastblock
);
531 panic("itrunc: newspace");
532 if (oldspace
- newspace
> 0) {
533 blocksreleased
+= btofsb(fs
, oldspace
- newspace
);
536 if (bn
> 0 && olddspace
- newspace
> 0) {
537 /* No segment accounting here, just vnode */
538 real_released
+= btofsb(fs
, olddspace
- newspace
);
544 /* Finish segment accounting corrections */
545 lfs_update_seguse(fs
, oip
, lastseg
, bc
);
547 for (level
= SINGLE
; level
<= TRIPLE
; level
++)
548 if ((newblks
[NDADDR
+ level
] == 0) !=
549 ((oip
->i_ffs1_ib
[level
]) == 0)) {
550 panic("lfs itrunc1");
552 for (i
= 0; i
< NDADDR
; i
++)
553 if ((newblks
[i
] == 0) != (oip
->i_ffs1_db
[i
] == 0)) {
554 panic("lfs itrunc2");
557 (!LIST_EMPTY(&ovp
->v_cleanblkhd
) || !LIST_EMPTY(&ovp
->v_dirtyblkhd
)))
558 panic("lfs itrunc3");
559 #endif /* DIAGNOSTIC */
561 * Put back the real size.
563 oip
->i_size
= oip
->i_ffs1_size
= length
;
564 oip
->i_lfs_effnblks
-= blocksreleased
;
565 oip
->i_ffs1_blocks
-= real_released
;
566 mutex_enter(&lfs_lock
);
567 fs
->lfs_bfree
+= blocksreleased
;
568 mutex_exit(&lfs_lock
);
570 if (oip
->i_size
== 0 &&
571 (oip
->i_ffs1_blocks
!= 0 || oip
->i_lfs_effnblks
!= 0)) {
572 printf("lfs_truncate: truncate to 0 but %d blks/%d effblks\n",
573 oip
->i_ffs1_blocks
, oip
->i_lfs_effnblks
);
574 panic("lfs_truncate: persistent blocks");
579 * If we truncated to zero, take us off the paging queue.
581 mutex_enter(&lfs_lock
);
582 if (oip
->i_size
== 0 && oip
->i_flags
& IN_PAGING
) {
583 oip
->i_flags
&= ~IN_PAGING
;
584 TAILQ_REMOVE(&fs
->lfs_pchainhd
, oip
, i_lfs_pchain
);
586 mutex_exit(&lfs_lock
);
588 oip
->i_flag
|= IN_CHANGE
;
590 (void) chkdq(oip
, -blocksreleased
, NOCRED
, 0);
592 lfs_reserve(fs
, ovp
, NULL
,
593 -btofsb(fs
, (2 * NIADDR
+ 3) << fs
->lfs_bshift
));
594 genfs_node_unlock(ovp
);
596 oip
->i_lfs_hiblk
= lblkno(fs
, oip
->i_size
+ fs
->lfs_bsize
- 1) - 1;
597 if (ovp
!= fs
->lfs_ivnode
)
599 return (allerror
? allerror
: error
);
602 /* Update segment and avail usage information when removing a block. */
604 lfs_blkfree(struct lfs
*fs
, struct inode
*ip
, daddr_t daddr
,
605 size_t bsize
, long *lastseg
, size_t *num
)
611 bsize
= fragroundup(fs
, bsize
);
613 if (*lastseg
!= (seg
= dtosn(fs
, daddr
))) {
614 error
= lfs_update_seguse(fs
, ip
, *lastseg
, *num
);
624 /* Finish the accounting updates for a segment. */
626 lfs_update_seguse(struct lfs
*fs
, struct inode
*ip
, long lastseg
, size_t num
)
632 if (lastseg
< 0 || num
== 0)
636 LIST_FOREACH(sd
, &ip
->i_lfs_segdhd
, list
)
637 if (sd
->segnum
== lastseg
)
640 sd
= malloc(sizeof(*sd
), M_SEGMENT
, M_WAITOK
);
641 sd
->segnum
= lastseg
;
643 LIST_INSERT_HEAD(&ip
->i_lfs_segdhd
, sd
, list
);
651 lfs_finalize_seguse(struct lfs
*fs
, void *v
)
656 LIST_HEAD(, segdelta
) *hd
= v
;
659 while((sd
= LIST_FIRST(hd
)) != NULL
) {
660 LIST_REMOVE(sd
, list
);
661 LFS_SEGENTRY(sup
, fs
, sd
->segnum
, bp
);
662 if (sd
->num
> sup
->su_nbytes
) {
663 printf("lfs_finalize_seguse: segment %ld short by %ld\n",
664 sd
->segnum
, (long)(sd
->num
- sup
->su_nbytes
));
665 panic("lfs_finalize_seguse: negative bytes");
666 sup
->su_nbytes
= sd
->num
;
668 sup
->su_nbytes
-= sd
->num
;
669 LFS_WRITESEGENTRY(sup
, fs
, sd
->segnum
, bp
);
674 /* Finish the accounting updates for a segment. */
676 lfs_finalize_ino_seguse(struct lfs
*fs
, struct inode
*ip
)
679 lfs_finalize_seguse(fs
, &ip
->i_lfs_segdhd
);
682 /* Finish the accounting updates for a segment. */
684 lfs_finalize_fs_seguse(struct lfs
*fs
)
687 lfs_finalize_seguse(fs
, &fs
->lfs_segdhd
);
691 * Release blocks associated with the inode ip and stored in the indirect
692 * block bn. Blocks are free'd in LIFO order up to (but not including)
693 * lastbn. If level is greater than SINGLE, the block is an indirect block
694 * and recursive calls to indirtrunc must be used to cleanse other indirect
697 * NB: triple indirect blocks are untested.
700 lfs_indirtrunc(struct inode
*ip
, daddr_t lbn
, daddr_t dbn
,
701 daddr_t lastbn
, int level
, long *countp
,
702 long *rcountp
, long *lastsegp
, size_t *bcp
)
706 struct lfs
*fs
= ip
->i_lfs
;
707 int32_t *bap
; /* XXX ondisk32 */
709 daddr_t nb
, nlbn
, last
;
710 int32_t *copy
= NULL
; /* XXX ondisk32 */
711 long blkcount
, rblkcount
, factor
;
712 int nblocks
, blocksreleased
= 0, real_released
= 0;
713 int error
= 0, allerror
= 0;
717 * Calculate index in current block of last
718 * block to be kept. -1 indicates the entire
719 * block so we need not calculate the index.
722 for (i
= SINGLE
; i
< level
; i
++)
723 factor
*= NINDIR(fs
);
727 nblocks
= btofsb(fs
, fs
->lfs_bsize
);
729 * Get buffer of block pointers, zero those entries corresponding
730 * to blocks to be free'd, and update on disk copy first. Since
731 * double(triple) indirect before single(double) indirect, calls
732 * to bmap on these blocks will fail. However, we already have
733 * the on disk address, so we have to set the b_blkno field
734 * explicitly instead of letting bread do everything for us.
737 bp
= getblk(vp
, lbn
, (int)fs
->lfs_bsize
, 0, 0);
738 if (bp
->b_oflags
& (BO_DONE
| BO_DELWRI
)) {
739 /* Braces must be here in case trace evaluates to nothing. */
740 trace(TR_BREADHIT
, pack(vp
, fs
->lfs_bsize
), lbn
);
742 trace(TR_BREADMISS
, pack(vp
, fs
->lfs_bsize
), lbn
);
743 curlwp
->l_ru
.ru_inblock
++; /* pay for read */
744 bp
->b_flags
|= B_READ
;
745 if (bp
->b_bcount
> bp
->b_bufsize
)
746 panic("lfs_indirtrunc: bad buffer size");
747 bp
->b_blkno
= fsbtodb(fs
, dbn
);
748 VOP_STRATEGY(vp
, bp
);
753 *countp
= *rcountp
= 0;
757 bap
= (int32_t *)bp
->b_data
; /* XXX ondisk32 */
759 copy
= (int32_t *)lfs_malloc(fs
, fs
->lfs_bsize
, LFS_NB_IBLOCK
);
760 memcpy((void *)copy
, (void *)bap
, (u_int
)fs
->lfs_bsize
);
761 memset((void *)&bap
[last
+ 1], 0,
763 (u_int
)(NINDIR(fs
) - (last
+ 1)) * sizeof (int32_t));
764 error
= VOP_BWRITE(bp
->b_vp
, bp
);
771 * Recursively free totally unused blocks.
773 for (i
= NINDIR(fs
) - 1, nlbn
= lbn
+ 1 - i
* factor
; i
> last
;
774 i
--, nlbn
+= factor
) {
778 if (level
> SINGLE
) {
779 error
= lfs_indirtrunc(ip
, nlbn
, nb
,
780 (daddr_t
)-1, level
- 1,
781 &blkcount
, &rblkcount
,
785 blocksreleased
+= blkcount
;
786 real_released
+= rblkcount
;
788 lfs_blkfree(fs
, ip
, nb
, fs
->lfs_bsize
, lastsegp
, bcp
);
790 real_released
+= nblocks
;
791 blocksreleased
+= nblocks
;
795 * Recursively free last partial block.
797 if (level
> SINGLE
&& lastbn
>= 0) {
798 last
= lastbn
% factor
;
801 error
= lfs_indirtrunc(ip
, nlbn
, nb
,
802 last
, level
- 1, &blkcount
,
803 &rblkcount
, lastsegp
, bcp
);
806 real_released
+= rblkcount
;
807 blocksreleased
+= blkcount
;
812 lfs_free(fs
, copy
, LFS_NB_IBLOCK
);
814 mutex_enter(&bufcache_lock
);
815 if (bp
->b_oflags
& BO_DELWRI
) {
817 fs
->lfs_avail
+= btofsb(fs
, bp
->b_bcount
);
818 wakeup(&fs
->lfs_avail
);
820 brelsel(bp
, BC_INVAL
);
821 mutex_exit(&bufcache_lock
);
824 *countp
= blocksreleased
;
825 *rcountp
= real_released
;
830 * Destroy any in core blocks past the truncation length.
831 * Inlined from vtruncbuf, so that lfs_avail could be updated.
832 * We take the seglock to prevent cleaning from occurring while we are
833 * invalidating blocks.
836 lfs_vtruncbuf(struct vnode
*vp
, daddr_t lbn
, bool catch, int slptimeo
)
838 struct buf
*bp
, *nbp
;
843 off
= round_page((voff_t
)lbn
<< vp
->v_mount
->mnt_fs_bshift
);
844 mutex_enter(vp
->v_interlock
);
845 error
= VOP_PUTPAGES(vp
, off
, 0, PGO_FREE
| PGO_SYNCIO
);
849 fs
= VTOI(vp
)->i_lfs
;
853 mutex_enter(&bufcache_lock
);
855 for (bp
= LIST_FIRST(&vp
->v_cleanblkhd
); bp
; bp
= nbp
) {
856 nbp
= LIST_NEXT(bp
, b_vnbufs
);
857 if (bp
->b_lblkno
< lbn
)
859 error
= bbusy(bp
, catch, slptimeo
, NULL
);
860 if (error
== EPASSTHROUGH
)
863 mutex_exit(&bufcache_lock
);
866 mutex_enter(bp
->b_objlock
);
867 if (bp
->b_oflags
& BO_DELWRI
) {
868 bp
->b_oflags
&= ~BO_DELWRI
;
869 fs
->lfs_avail
+= btofsb(fs
, bp
->b_bcount
);
870 wakeup(&fs
->lfs_avail
);
872 mutex_exit(bp
->b_objlock
);
874 brelsel(bp
, BC_INVAL
| BC_VFLUSH
);
877 for (bp
= LIST_FIRST(&vp
->v_dirtyblkhd
); bp
; bp
= nbp
) {
878 nbp
= LIST_NEXT(bp
, b_vnbufs
);
879 if (bp
->b_lblkno
< lbn
)
881 error
= bbusy(bp
, catch, slptimeo
, NULL
);
882 if (error
== EPASSTHROUGH
)
885 mutex_exit(&bufcache_lock
);
888 mutex_enter(bp
->b_objlock
);
889 if (bp
->b_oflags
& BO_DELWRI
) {
890 bp
->b_oflags
&= ~BO_DELWRI
;
891 fs
->lfs_avail
+= btofsb(fs
, bp
->b_bcount
);
892 wakeup(&fs
->lfs_avail
);
894 mutex_exit(bp
->b_objlock
);
896 brelsel(bp
, BC_INVAL
| BC_VFLUSH
);
898 mutex_exit(&bufcache_lock
);