1 /* $NetBSD: lfs_inode.c,v 1.119 2008/03/27 19:06:52 ad Exp $ */
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 * Copyright (c) 1986, 1989, 1991, 1993
33 * The Regents of the University of California. All rights reserved.
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * @(#)lfs_inode.c 8.9 (Berkeley) 5/8/95
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: lfs_inode.c,v 1.119 2008/03/27 19:06:52 ad Exp $");
65 #if defined(_KERNEL_OPT)
66 #include "opt_quota.h"
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/mount.h>
72 #include <sys/malloc.h>
76 #include <sys/vnode.h>
77 #include <sys/kernel.h>
78 #include <sys/trace.h>
79 #include <sys/resourcevar.h>
80 #include <sys/kauth.h>
82 #include <ufs/ufs/quota.h>
83 #include <ufs/ufs/inode.h>
84 #include <ufs/ufs/ufsmount.h>
85 #include <ufs/ufs/ufs_extern.h>
87 #include <ufs/lfs/lfs.h>
88 #include <ufs/lfs/lfs_extern.h>
90 static int lfs_update_seguse(struct lfs
*, struct inode
*ip
, long, size_t);
91 static int lfs_indirtrunc (struct inode
*, daddr_t
, daddr_t
,
92 daddr_t
, int, long *, long *, long *, size_t *);
93 static int lfs_blkfree (struct lfs
*, struct inode
*, daddr_t
, size_t, long *, size_t *);
94 static int lfs_vtruncbuf(struct vnode
*, daddr_t
, bool, int);
96 /* Search a block for a specific dinode. */
98 lfs_ifind(struct lfs
*fs
, ino_t ino
, struct buf
*bp
)
100 struct ufs1_dinode
*dip
= (struct ufs1_dinode
*)bp
->b_data
;
101 struct ufs1_dinode
*ldip
, *fin
;
103 ASSERT_NO_SEGLOCK(fs
);
105 * Read the inode block backwards, since later versions of the
106 * inode will supercede earlier ones. Though it is unlikely, it is
107 * possible that the same inode will appear in the same inode block.
109 fin
= dip
+ INOPB(fs
);
110 for (ldip
= fin
- 1; ldip
>= dip
; --ldip
)
111 if (ldip
->di_inumber
== ino
)
114 printf("searched %d entries\n", (int)(fin
- dip
));
115 printf("offset is 0x%x (seg %d)\n", fs
->lfs_offset
,
116 dtosn(fs
, fs
->lfs_offset
));
117 printf("block is 0x%llx (seg %lld)\n",
118 (unsigned long long)dbtofsb(fs
, bp
->b_blkno
),
119 (long long)dtosn(fs
, dbtofsb(fs
, bp
->b_blkno
)));
125 lfs_update(struct vnode
*vp
, const struct timespec
*acc
,
126 const struct timespec
*mod
, int updflags
)
129 struct lfs
*fs
= VFSTOUFS(vp
->v_mount
)->um_lfs
;
132 ASSERT_NO_SEGLOCK(fs
);
133 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
)
138 * If we are called from vinvalbuf, and the file's blocks have
139 * already been scheduled for writing, but the writes have not
140 * yet completed, lfs_vflush will not be called, and vinvalbuf
141 * will cause a panic. So, we must wait until any pending write
142 * for our inode completes, if we are called with UPDATE_WAIT set.
144 mutex_enter(&vp
->v_interlock
);
145 while ((updflags
& (UPDATE_WAIT
|UPDATE_DIROP
)) == UPDATE_WAIT
&&
147 DLOG((DLOG_SEG
, "lfs_update: sleeping on ino %d"
148 " (in progress)\n", ip
->i_number
));
149 cv_wait(&vp
->v_cv
, &vp
->v_interlock
);
151 mutex_exit(&vp
->v_interlock
);
152 LFS_ITIMES(ip
, acc
, mod
, NULL
);
153 if (updflags
& UPDATE_CLOSE
)
154 flags
= ip
->i_flag
& (IN_MODIFIED
| IN_ACCESSED
| IN_CLEANING
);
156 flags
= ip
->i_flag
& (IN_MODIFIED
| IN_CLEANING
);
160 /* If sync, push back the vnode and any dirty blocks it may have. */
161 if ((updflags
& (UPDATE_WAIT
|UPDATE_DIROP
)) == UPDATE_WAIT
) {
162 /* Avoid flushing VU_DIROP. */
163 mutex_enter(&lfs_lock
);
165 while (vp
->v_uflag
& VU_DIROP
) {
166 DLOG((DLOG_DIROP
, "lfs_update: sleeping on inode %d"
167 " (dirops)\n", ip
->i_number
));
168 DLOG((DLOG_DIROP
, "lfs_update: vflags 0x%x, iflags"
170 vp
->v_iflag
| vp
->v_vflag
| vp
->v_uflag
,
172 if (fs
->lfs_dirops
== 0)
173 lfs_flush_fs(fs
, SEGM_SYNC
);
175 mtsleep(&fs
->lfs_writer
, PRIBIO
+1, "lfs_fsync",
177 /* XXX KS - by falling out here, are we writing the vn
181 mutex_exit(&lfs_lock
);
182 return lfs_vflush(vp
);
187 #define SINGLE 0 /* index of single indirect block */
188 #define DOUBLE 1 /* index of double indirect block */
189 #define TRIPLE 2 /* index of triple indirect block */
191 * Truncate the inode oip to at most length size, freeing the
194 /* VOP_BWRITE 1 + NIADDR + lfs_balloc == 2 + 2*NIADDR times */
197 lfs_truncate(struct vnode
*ovp
, off_t length
, int ioflag
, kauth_cred_t cred
)
200 struct inode
*oip
= VTOI(ovp
);
201 daddr_t bn
, lbn
, lastiblock
[NIADDR
], indir_lbn
[NIADDR
];
203 int32_t newblks
[NDADDR
+ NIADDR
];
206 int offset
, size
, level
;
207 long count
, rcount
, blocksreleased
= 0, real_released
= 0;
209 int aflags
, error
, allerror
= 0;
215 struct ufsmount
*ump
= oip
->i_ump
;
217 if (ovp
->v_type
== VCHR
|| ovp
->v_type
== VBLK
||
218 ovp
->v_type
== VFIFO
|| ovp
->v_type
== VSOCK
) {
219 KASSERT(oip
->i_size
== 0);
227 * Just return and not update modification times.
229 if (oip
->i_size
== length
)
232 if (ovp
->v_type
== VLNK
&&
233 (oip
->i_size
< ump
->um_maxsymlinklen
||
234 (ump
->um_maxsymlinklen
== 0 &&
235 oip
->i_ffs1_blocks
== 0))) {
238 panic("lfs_truncate: partial truncate of symlink");
240 memset((char *)SHORTLINK(oip
), 0, (u_int
)oip
->i_size
);
241 oip
->i_size
= oip
->i_ffs1_size
= 0;
242 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
243 return (lfs_update(ovp
, NULL
, NULL
, 0));
245 if (oip
->i_size
== length
) {
246 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
247 return (lfs_update(ovp
, NULL
, NULL
, 0));
252 usepc
= (ovp
->v_type
== VREG
&& ovp
!= fs
->lfs_ivnode
);
254 ASSERT_NO_SEGLOCK(fs
);
256 * Lengthen the size of the file. We must ensure that the
257 * last byte of the file is allocated. Since the smallest
258 * value of osize is 0, length will be at least 1.
260 if (osize
< length
) {
261 if (length
> ump
->um_maxfilesize
)
264 if (ioflag
& IO_SYNC
)
267 if (lblkno(fs
, osize
) < NDADDR
&&
268 lblkno(fs
, osize
) != lblkno(fs
, length
) &&
269 blkroundup(fs
, osize
) != osize
) {
272 eob
= blkroundup(fs
, osize
);
273 uvm_vnp_setwritesize(ovp
, eob
);
274 error
= ufs_balloc_range(ovp
, osize
,
275 eob
- osize
, cred
, aflags
);
278 if (ioflag
& IO_SYNC
) {
279 mutex_enter(&ovp
->v_interlock
);
281 trunc_page(osize
& fs
->lfs_bmask
),
283 PGO_CLEANIT
| PGO_SYNCIO
);
286 uvm_vnp_setwritesize(ovp
, length
);
287 error
= ufs_balloc_range(ovp
, length
- 1, 1, cred
,
290 (void) lfs_truncate(ovp
, osize
,
291 ioflag
& IO_SYNC
, cred
);
294 uvm_vnp_setsize(ovp
, length
);
295 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
296 KASSERT(ovp
->v_size
== oip
->i_size
);
297 oip
->i_lfs_hiblk
= lblkno(fs
, oip
->i_size
+ fs
->lfs_bsize
- 1) - 1;
298 return (lfs_update(ovp
, NULL
, NULL
, 0));
300 error
= lfs_reserve(fs
, ovp
, NULL
,
301 btofsb(fs
, (NIADDR
+ 2) << fs
->lfs_bshift
));
304 error
= lfs_balloc(ovp
, length
- 1, 1, cred
,
306 lfs_reserve(fs
, ovp
, NULL
,
307 -btofsb(fs
, (NIADDR
+ 2) << fs
->lfs_bshift
));
310 oip
->i_ffs1_size
= oip
->i_size
= length
;
311 uvm_vnp_setsize(ovp
, length
);
312 (void) VOP_BWRITE(bp
);
313 oip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
314 oip
->i_lfs_hiblk
= lblkno(fs
, oip
->i_size
+ fs
->lfs_bsize
- 1) - 1;
315 return (lfs_update(ovp
, NULL
, NULL
, 0));
319 if ((error
= lfs_reserve(fs
, ovp
, NULL
,
320 btofsb(fs
, (2 * NIADDR
+ 3) << fs
->lfs_bshift
))) != 0)
324 * Shorten the size of the file. If the file is not being
325 * truncated to a block boundary, the contents of the
326 * partial block following the end of the file must be
327 * zero'ed in case it ever becomes accessible again because
328 * of subsequent file growth. Directories however are not
329 * zero'ed as they should grow back initialized to empty.
331 offset
= blkoff(fs
, length
);
335 if (ovp
!= fs
->lfs_ivnode
)
336 lfs_seglock(fs
, SEGM_PROT
);
338 oip
->i_size
= oip
->i_ffs1_size
= length
;
340 lbn
= lblkno(fs
, length
);
342 if (ioflag
& IO_SYNC
)
344 error
= lfs_balloc(ovp
, length
- 1, 1, cred
, aflags
, &bp
);
346 lfs_reserve(fs
, ovp
, NULL
,
347 -btofsb(fs
, (2 * NIADDR
+ 3) << fs
->lfs_bshift
));
350 obufsize
= bp
->b_bufsize
;
351 odb
= btofsb(fs
, bp
->b_bcount
);
352 oip
->i_size
= oip
->i_ffs1_size
= length
;
353 size
= blksize(fs
, oip
, lbn
);
354 if (ovp
->v_type
!= VDIR
)
355 memset((char *)bp
->b_data
+ offset
, 0,
356 (u_int
)(size
- offset
));
357 allocbuf(bp
, size
, 1);
358 if ((bp
->b_flags
& B_LOCKED
) != 0 && bp
->b_iodone
== NULL
) {
359 mutex_enter(&lfs_lock
);
360 locked_queue_bytes
-= obufsize
- bp
->b_bufsize
;
361 mutex_exit(&lfs_lock
);
363 if (bp
->b_oflags
& BO_DELWRI
)
364 fs
->lfs_avail
+= odb
- btofsb(fs
, size
);
365 (void) VOP_BWRITE(bp
);
366 } else { /* vp->v_type == VREG && length < osize && offset != 0 */
368 * When truncating a regular file down to a non-block-aligned
369 * size, we must zero the part of last block which is past
370 * the new EOF. We must synchronously flush the zeroed pages
371 * to disk since the new pages will be invalidated as soon
372 * as we inform the VM system of the new, smaller size.
373 * We must do this before acquiring the GLOCK, since fetching
374 * the pages will acquire the GLOCK internally.
375 * So there is a window where another thread could see a whole
376 * zeroed page past EOF, but that's life.
381 aflags
= ioflag
& IO_SYNC
? B_SYNC
: 0;
382 error
= ufs_balloc_range(ovp
, length
- 1, 1, cred
, aflags
);
384 lfs_reserve(fs
, ovp
, NULL
,
385 -btofsb(fs
, (2 * NIADDR
+ 3) << fs
->lfs_bshift
));
388 xlbn
= lblkno(fs
, length
);
389 size
= blksize(fs
, oip
, xlbn
);
390 eoz
= MIN(lblktosize(fs
, xlbn
) + size
, osize
);
391 uvm_vnp_zerorange(ovp
, length
, eoz
- length
);
392 if (round_page(eoz
) > round_page(length
)) {
393 mutex_enter(&ovp
->v_interlock
);
394 error
= VOP_PUTPAGES(ovp
, round_page(length
),
396 PGO_CLEANIT
| PGO_DEACTIVATE
|
397 ((ioflag
& IO_SYNC
) ? PGO_SYNCIO
: 0));
399 lfs_reserve(fs
, ovp
, NULL
,
400 -btofsb(fs
, (2 * NIADDR
+ 3) << fs
->lfs_bshift
));
406 genfs_node_wrlock(ovp
);
408 oip
->i_size
= oip
->i_ffs1_size
= length
;
409 uvm_vnp_setsize(ovp
, length
);
411 * Calculate index into inode's block list of
412 * last direct and indirect blocks (if any)
413 * which we want to keep. Lastblock is -1 when
414 * the file is truncated to 0.
416 /* Avoid sign overflow - XXX assumes that off_t is a quad_t. */
417 if (length
> QUAD_MAX
- fs
->lfs_bsize
)
418 lastblock
= lblkno(fs
, QUAD_MAX
- fs
->lfs_bsize
);
420 lastblock
= lblkno(fs
, length
+ fs
->lfs_bsize
- 1) - 1;
421 lastiblock
[SINGLE
] = lastblock
- NDADDR
;
422 lastiblock
[DOUBLE
] = lastiblock
[SINGLE
] - NINDIR(fs
);
423 lastiblock
[TRIPLE
] = lastiblock
[DOUBLE
] - NINDIR(fs
) * NINDIR(fs
);
424 nblocks
= btofsb(fs
, fs
->lfs_bsize
);
426 * Record changed file and block pointers before we start
427 * freeing blocks. lastiblock values are also normalized to -1
428 * for calls to lfs_indirtrunc below.
430 memcpy((void *)newblks
, (void *)&oip
->i_ffs1_db
[0], sizeof newblks
);
431 for (level
= TRIPLE
; level
>= SINGLE
; level
--)
432 if (lastiblock
[level
] < 0) {
433 newblks
[NDADDR
+level
] = 0;
434 lastiblock
[level
] = -1;
436 for (i
= NDADDR
- 1; i
> lastblock
; i
--)
439 oip
->i_size
= oip
->i_ffs1_size
= osize
;
440 error
= lfs_vtruncbuf(ovp
, lastblock
+ 1, false, 0);
441 if (error
&& !allerror
)
445 * Indirect blocks first.
447 indir_lbn
[SINGLE
] = -NDADDR
;
448 indir_lbn
[DOUBLE
] = indir_lbn
[SINGLE
] - NINDIR(fs
) - 1;
449 indir_lbn
[TRIPLE
] = indir_lbn
[DOUBLE
] - NINDIR(fs
) * NINDIR(fs
) - 1;
450 for (level
= TRIPLE
; level
>= SINGLE
; level
--) {
451 bn
= oip
->i_ffs1_ib
[level
];
453 error
= lfs_indirtrunc(oip
, indir_lbn
[level
],
454 bn
, lastiblock
[level
],
455 level
, &count
, &rcount
,
459 real_released
+= rcount
;
460 blocksreleased
+= count
;
461 if (lastiblock
[level
] < 0) {
462 if (oip
->i_ffs1_ib
[level
] > 0)
463 real_released
+= nblocks
;
464 blocksreleased
+= nblocks
;
465 oip
->i_ffs1_ib
[level
] = 0;
466 lfs_blkfree(fs
, oip
, bn
, fs
->lfs_bsize
,
468 lfs_deregister_block(ovp
, bn
);
471 if (lastiblock
[level
] >= 0)
476 * All whole direct blocks or frags.
478 for (i
= NDADDR
- 1; i
> lastblock
; i
--) {
481 bn
= oip
->i_ffs1_db
[i
];
484 bsize
= blksize(fs
, oip
, i
);
485 if (oip
->i_ffs1_db
[i
] > 0) {
486 /* Check for fragment size changes */
487 obsize
= oip
->i_lfs_fragsize
[i
];
488 real_released
+= btofsb(fs
, obsize
);
489 oip
->i_lfs_fragsize
[i
] = 0;
492 blocksreleased
+= btofsb(fs
, bsize
);
493 oip
->i_ffs1_db
[i
] = 0;
494 lfs_blkfree(fs
, oip
, bn
, obsize
, &lastseg
, &bc
);
495 lfs_deregister_block(ovp
, bn
);
501 * Finally, look for a change in size of the
502 * last direct block; release any frags.
504 bn
= oip
->i_ffs1_db
[lastblock
];
506 long oldspace
, newspace
;
512 * Calculate amount of space we're giving
513 * back as old block size minus new block size.
515 oldspace
= blksize(fs
, oip
, lastblock
);
517 olddspace
= oip
->i_lfs_fragsize
[lastblock
];
520 oip
->i_size
= oip
->i_ffs1_size
= length
;
521 newspace
= blksize(fs
, oip
, lastblock
);
523 panic("itrunc: newspace");
524 if (oldspace
- newspace
> 0) {
525 blocksreleased
+= btofsb(fs
, oldspace
- newspace
);
528 if (bn
> 0 && olddspace
- newspace
> 0) {
529 /* No segment accounting here, just vnode */
530 real_released
+= btofsb(fs
, olddspace
- newspace
);
536 /* Finish segment accounting corrections */
537 lfs_update_seguse(fs
, oip
, lastseg
, bc
);
539 for (level
= SINGLE
; level
<= TRIPLE
; level
++)
540 if ((newblks
[NDADDR
+ level
] == 0) !=
541 ((oip
->i_ffs1_ib
[level
]) == 0)) {
542 panic("lfs itrunc1");
544 for (i
= 0; i
< NDADDR
; i
++)
545 if ((newblks
[i
] == 0) != (oip
->i_ffs1_db
[i
] == 0)) {
546 panic("lfs itrunc2");
549 (!LIST_EMPTY(&ovp
->v_cleanblkhd
) || !LIST_EMPTY(&ovp
->v_dirtyblkhd
)))
550 panic("lfs itrunc3");
551 #endif /* DIAGNOSTIC */
553 * Put back the real size.
555 oip
->i_size
= oip
->i_ffs1_size
= length
;
556 oip
->i_lfs_effnblks
-= blocksreleased
;
557 oip
->i_ffs1_blocks
-= real_released
;
558 mutex_enter(&lfs_lock
);
559 fs
->lfs_bfree
+= blocksreleased
;
560 mutex_exit(&lfs_lock
);
562 if (oip
->i_size
== 0 &&
563 (oip
->i_ffs1_blocks
!= 0 || oip
->i_lfs_effnblks
!= 0)) {
564 printf("lfs_truncate: truncate to 0 but %d blks/%d effblks\n",
565 oip
->i_ffs1_blocks
, oip
->i_lfs_effnblks
);
566 panic("lfs_truncate: persistent blocks");
571 * If we truncated to zero, take us off the paging queue.
573 mutex_enter(&lfs_lock
);
574 if (oip
->i_size
== 0 && oip
->i_flags
& IN_PAGING
) {
575 oip
->i_flags
&= ~IN_PAGING
;
576 TAILQ_REMOVE(&fs
->lfs_pchainhd
, oip
, i_lfs_pchain
);
578 mutex_exit(&lfs_lock
);
580 oip
->i_flag
|= IN_CHANGE
;
582 (void) chkdq(oip
, -blocksreleased
, NOCRED
, 0);
584 lfs_reserve(fs
, ovp
, NULL
,
585 -btofsb(fs
, (2 * NIADDR
+ 3) << fs
->lfs_bshift
));
586 genfs_node_unlock(ovp
);
588 oip
->i_lfs_hiblk
= lblkno(fs
, oip
->i_size
+ fs
->lfs_bsize
- 1) - 1;
589 if (ovp
!= fs
->lfs_ivnode
)
591 return (allerror
? allerror
: error
);
594 /* Update segment and avail usage information when removing a block. */
596 lfs_blkfree(struct lfs
*fs
, struct inode
*ip
, daddr_t daddr
,
597 size_t bsize
, long *lastseg
, size_t *num
)
603 bsize
= fragroundup(fs
, bsize
);
605 if (*lastseg
!= (seg
= dtosn(fs
, daddr
))) {
606 error
= lfs_update_seguse(fs
, ip
, *lastseg
, *num
);
616 /* Finish the accounting updates for a segment. */
618 lfs_update_seguse(struct lfs
*fs
, struct inode
*ip
, long lastseg
, size_t num
)
624 if (lastseg
< 0 || num
== 0)
628 LIST_FOREACH(sd
, &ip
->i_lfs_segdhd
, list
)
629 if (sd
->segnum
== lastseg
)
632 sd
= malloc(sizeof(*sd
), M_SEGMENT
, M_WAITOK
);
633 sd
->segnum
= lastseg
;
635 LIST_INSERT_HEAD(&ip
->i_lfs_segdhd
, sd
, list
);
643 lfs_finalize_seguse(struct lfs
*fs
, void *v
)
648 LIST_HEAD(, segdelta
) *hd
= v
;
651 while((sd
= LIST_FIRST(hd
)) != NULL
) {
652 LIST_REMOVE(sd
, list
);
653 LFS_SEGENTRY(sup
, fs
, sd
->segnum
, bp
);
654 if (sd
->num
> sup
->su_nbytes
) {
655 printf("lfs_finalize_seguse: segment %ld short by %ld\n",
656 sd
->segnum
, (long)(sd
->num
- sup
->su_nbytes
));
657 panic("lfs_finalize_seguse: negative bytes");
658 sup
->su_nbytes
= sd
->num
;
660 sup
->su_nbytes
-= sd
->num
;
661 LFS_WRITESEGENTRY(sup
, fs
, sd
->segnum
, bp
);
666 /* Finish the accounting updates for a segment. */
668 lfs_finalize_ino_seguse(struct lfs
*fs
, struct inode
*ip
)
671 lfs_finalize_seguse(fs
, &ip
->i_lfs_segdhd
);
674 /* Finish the accounting updates for a segment. */
676 lfs_finalize_fs_seguse(struct lfs
*fs
)
679 lfs_finalize_seguse(fs
, &fs
->lfs_segdhd
);
683 * Release blocks associated with the inode ip and stored in the indirect
684 * block bn. Blocks are free'd in LIFO order up to (but not including)
685 * lastbn. If level is greater than SINGLE, the block is an indirect block
686 * and recursive calls to indirtrunc must be used to cleanse other indirect
689 * NB: triple indirect blocks are untested.
692 lfs_indirtrunc(struct inode
*ip
, daddr_t lbn
, daddr_t dbn
,
693 daddr_t lastbn
, int level
, long *countp
,
694 long *rcountp
, long *lastsegp
, size_t *bcp
)
698 struct lfs
*fs
= ip
->i_lfs
;
699 int32_t *bap
; /* XXX ondisk32 */
701 daddr_t nb
, nlbn
, last
;
702 int32_t *copy
= NULL
; /* XXX ondisk32 */
703 long blkcount
, rblkcount
, factor
;
704 int nblocks
, blocksreleased
= 0, real_released
= 0;
705 int error
= 0, allerror
= 0;
709 * Calculate index in current block of last
710 * block to be kept. -1 indicates the entire
711 * block so we need not calculate the index.
714 for (i
= SINGLE
; i
< level
; i
++)
715 factor
*= NINDIR(fs
);
719 nblocks
= btofsb(fs
, fs
->lfs_bsize
);
721 * Get buffer of block pointers, zero those entries corresponding
722 * to blocks to be free'd, and update on disk copy first. Since
723 * double(triple) indirect before single(double) indirect, calls
724 * to bmap on these blocks will fail. However, we already have
725 * the on disk address, so we have to set the b_blkno field
726 * explicitly instead of letting bread do everything for us.
729 bp
= getblk(vp
, lbn
, (int)fs
->lfs_bsize
, 0, 0);
730 if (bp
->b_oflags
& (BO_DONE
| BO_DELWRI
)) {
731 /* Braces must be here in case trace evaluates to nothing. */
732 trace(TR_BREADHIT
, pack(vp
, fs
->lfs_bsize
), lbn
);
734 trace(TR_BREADMISS
, pack(vp
, fs
->lfs_bsize
), lbn
);
735 curlwp
->l_ru
.ru_inblock
++; /* pay for read */
736 bp
->b_flags
|= B_READ
;
737 if (bp
->b_bcount
> bp
->b_bufsize
)
738 panic("lfs_indirtrunc: bad buffer size");
739 bp
->b_blkno
= fsbtodb(fs
, dbn
);
740 VOP_STRATEGY(vp
, bp
);
745 *countp
= *rcountp
= 0;
749 bap
= (int32_t *)bp
->b_data
; /* XXX ondisk32 */
751 copy
= (int32_t *)lfs_malloc(fs
, fs
->lfs_bsize
, LFS_NB_IBLOCK
);
752 memcpy((void *)copy
, (void *)bap
, (u_int
)fs
->lfs_bsize
);
753 memset((void *)&bap
[last
+ 1], 0,
755 (u_int
)(NINDIR(fs
) - (last
+ 1)) * sizeof (int32_t));
756 error
= VOP_BWRITE(bp
);
763 * Recursively free totally unused blocks.
765 for (i
= NINDIR(fs
) - 1, nlbn
= lbn
+ 1 - i
* factor
; i
> last
;
766 i
--, nlbn
+= factor
) {
770 if (level
> SINGLE
) {
771 error
= lfs_indirtrunc(ip
, nlbn
, nb
,
772 (daddr_t
)-1, level
- 1,
773 &blkcount
, &rblkcount
,
777 blocksreleased
+= blkcount
;
778 real_released
+= rblkcount
;
780 lfs_blkfree(fs
, ip
, nb
, fs
->lfs_bsize
, lastsegp
, bcp
);
782 real_released
+= nblocks
;
783 blocksreleased
+= nblocks
;
787 * Recursively free last partial block.
789 if (level
> SINGLE
&& lastbn
>= 0) {
790 last
= lastbn
% factor
;
793 error
= lfs_indirtrunc(ip
, nlbn
, nb
,
794 last
, level
- 1, &blkcount
,
795 &rblkcount
, lastsegp
, bcp
);
798 real_released
+= rblkcount
;
799 blocksreleased
+= blkcount
;
804 lfs_free(fs
, copy
, LFS_NB_IBLOCK
);
806 mutex_enter(&bufcache_lock
);
807 if (bp
->b_oflags
& BO_DELWRI
) {
809 fs
->lfs_avail
+= btofsb(fs
, bp
->b_bcount
);
810 wakeup(&fs
->lfs_avail
);
812 brelsel(bp
, BC_INVAL
);
813 mutex_exit(&bufcache_lock
);
816 *countp
= blocksreleased
;
817 *rcountp
= real_released
;
822 * Destroy any in core blocks past the truncation length.
823 * Inlined from vtruncbuf, so that lfs_avail could be updated.
824 * We take the seglock to prevent cleaning from occurring while we are
825 * invalidating blocks.
828 lfs_vtruncbuf(struct vnode
*vp
, daddr_t lbn
, bool catch, int slptimeo
)
830 struct buf
*bp
, *nbp
;
835 off
= round_page((voff_t
)lbn
<< vp
->v_mount
->mnt_fs_bshift
);
836 mutex_enter(&vp
->v_interlock
);
837 error
= VOP_PUTPAGES(vp
, off
, 0, PGO_FREE
| PGO_SYNCIO
);
841 fs
= VTOI(vp
)->i_lfs
;
845 mutex_enter(&bufcache_lock
);
847 for (bp
= LIST_FIRST(&vp
->v_cleanblkhd
); bp
; bp
= nbp
) {
848 nbp
= LIST_NEXT(bp
, b_vnbufs
);
849 if (bp
->b_lblkno
< lbn
)
851 error
= bbusy(bp
, catch, slptimeo
, NULL
);
852 if (error
== EPASSTHROUGH
)
855 mutex_exit(&bufcache_lock
);
858 mutex_enter(bp
->b_objlock
);
859 if (bp
->b_oflags
& BO_DELWRI
) {
860 bp
->b_oflags
&= ~BO_DELWRI
;
861 fs
->lfs_avail
+= btofsb(fs
, bp
->b_bcount
);
862 wakeup(&fs
->lfs_avail
);
864 mutex_exit(bp
->b_objlock
);
866 brelsel(bp
, BC_INVAL
| BC_VFLUSH
);
869 for (bp
= LIST_FIRST(&vp
->v_dirtyblkhd
); bp
; bp
= nbp
) {
870 nbp
= LIST_NEXT(bp
, b_vnbufs
);
871 if (bp
->b_lblkno
< lbn
)
873 error
= bbusy(bp
, catch, slptimeo
, NULL
);
874 if (error
== EPASSTHROUGH
)
877 mutex_exit(&bufcache_lock
);
880 mutex_enter(bp
->b_objlock
);
881 if (bp
->b_oflags
& BO_DELWRI
) {
882 bp
->b_oflags
&= ~BO_DELWRI
;
883 fs
->lfs_avail
+= btofsb(fs
, bp
->b_bcount
);
884 wakeup(&fs
->lfs_avail
);
886 mutex_exit(bp
->b_objlock
);
888 brelsel(bp
, BC_INVAL
| BC_VFLUSH
);
890 mutex_exit(&bufcache_lock
);