1 /* $NetBSD: lfs_syscalls.c,v 1.170 2015/09/01 06:08:37 dholland Exp $ */
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007, 2008
5 * The NetBSD Foundation, Inc.
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Konrad E. Schroder <perseant@hhhh.org>.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
33 * Copyright (c) 1991, 1993, 1994
34 * The Regents of the University of California. All rights reserved.
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)lfs_syscalls.c 8.10 (Berkeley) 5/14/95
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: lfs_syscalls.c,v 1.170 2015/09/01 06:08:37 dholland Exp $");
67 # define LFS /* for prototypes in syscallargs.h */
70 #include <sys/param.h>
71 #include <sys/systm.h>
74 #include <sys/mount.h>
75 #include <sys/vnode.h>
76 #include <sys/kernel.h>
77 #include <sys/kauth.h>
78 #include <sys/syscallargs.h>
80 #include <ufs/lfs/ulfs_inode.h>
81 #include <ufs/lfs/ulfsmount.h>
82 #include <ufs/lfs/ulfs_extern.h>
84 #include <ufs/lfs/lfs.h>
85 #include <ufs/lfs/lfs_accessors.h>
86 #include <ufs/lfs/lfs_kernel.h>
87 #include <ufs/lfs/lfs_extern.h>
89 static int lfs_fastvget(struct mount
*, ino_t
, BLOCK_INFO
*, int,
91 static struct buf
*lfs_fakebuf(struct lfs
*, struct vnode
*, daddr_t
,
97 * This will mark inodes and blocks dirty, so they are written into the log.
98 * It will block until all the blocks have been written. The segment create
99 * time passed in the block_info and inode_info structures is used to decide
100 * if the data is valid for each block (in case some process dirtied a block
101 * or inode that is being cleaned between the determination that a block is
102 * live and the lfs_markv call).
105 * -1/errno is return on error.
107 #ifdef USE_64BIT_SYSCALLS
109 sys_lfs_markv(struct lwp
*l
, const struct sys_lfs_markv_args
*uap
, register_t
*retval
)
112 syscallarg(fsid_t *) fsidp;
113 syscallarg(struct block_info *) blkiov;
114 syscallarg(int) blkcnt;
122 if ((error
= copyin(SCARG(uap
, fsidp
), &fsid
, sizeof(fsid_t
))) != 0)
125 if ((mntp
= vfs_getvfs(fsidp
)) == NULL
)
127 fs
= VFSTOULFS(mntp
)->um_lfs
;
129 blkcnt
= SCARG(uap
, blkcnt
);
130 if ((u_int
) blkcnt
> LFS_MARKV_MAXBLKCNT
)
133 KERNEL_LOCK(1, NULL
);
134 blkiov
= lfs_malloc(fs
, blkcnt
* sizeof(BLOCK_INFO
), LFS_NB_BLKIOV
);
135 if ((error
= copyin(SCARG(uap
, blkiov
), blkiov
,
136 blkcnt
* sizeof(BLOCK_INFO
))) != 0)
139 if ((error
= lfs_markv(p
, &fsid
, blkiov
, blkcnt
)) == 0)
140 copyout(blkiov
, SCARG(uap
, blkiov
),
141 blkcnt
* sizeof(BLOCK_INFO
));
143 lfs_free(fs
, blkiov
, LFS_NB_BLKIOV
);
144 KERNEL_UNLOCK_ONE(NULL
);
149 sys_lfs_markv(struct lwp
*l
, const struct sys_lfs_markv_args
*uap
, register_t
*retval
)
152 syscallarg(fsid_t *) fsidp;
153 syscallarg(struct block_info *) blkiov;
154 syscallarg(int) blkcnt;
157 BLOCK_INFO_15
*blkiov15
;
158 int i
, blkcnt
, error
;
163 if ((error
= copyin(SCARG(uap
, fsidp
), &fsid
, sizeof(fsid_t
))) != 0)
166 if ((mntp
= vfs_getvfs(&fsid
)) == NULL
)
168 fs
= VFSTOULFS(mntp
)->um_lfs
;
170 blkcnt
= SCARG(uap
, blkcnt
);
171 if ((u_int
) blkcnt
> LFS_MARKV_MAXBLKCNT
)
174 KERNEL_LOCK(1, NULL
);
175 blkiov
= lfs_malloc(fs
, blkcnt
* sizeof(BLOCK_INFO
), LFS_NB_BLKIOV
);
176 blkiov15
= lfs_malloc(fs
, blkcnt
* sizeof(BLOCK_INFO_15
), LFS_NB_BLKIOV
);
177 if ((error
= copyin(SCARG(uap
, blkiov
), blkiov15
,
178 blkcnt
* sizeof(BLOCK_INFO_15
))) != 0)
181 for (i
= 0; i
< blkcnt
; i
++) {
182 blkiov
[i
].bi_inode
= blkiov15
[i
].bi_inode
;
183 blkiov
[i
].bi_lbn
= blkiov15
[i
].bi_lbn
;
184 blkiov
[i
].bi_daddr
= blkiov15
[i
].bi_daddr
;
185 blkiov
[i
].bi_segcreate
= blkiov15
[i
].bi_segcreate
;
186 blkiov
[i
].bi_version
= blkiov15
[i
].bi_version
;
187 blkiov
[i
].bi_bp
= blkiov15
[i
].bi_bp
;
188 blkiov
[i
].bi_size
= blkiov15
[i
].bi_size
;
191 if ((error
= lfs_markv(l
, &fsid
, blkiov
, blkcnt
)) == 0) {
192 for (i
= 0; i
< blkcnt
; i
++) {
193 blkiov15
[i
].bi_inode
= blkiov
[i
].bi_inode
;
194 blkiov15
[i
].bi_lbn
= blkiov
[i
].bi_lbn
;
195 blkiov15
[i
].bi_daddr
= blkiov
[i
].bi_daddr
;
196 blkiov15
[i
].bi_segcreate
= blkiov
[i
].bi_segcreate
;
197 blkiov15
[i
].bi_version
= blkiov
[i
].bi_version
;
198 blkiov15
[i
].bi_bp
= blkiov
[i
].bi_bp
;
199 blkiov15
[i
].bi_size
= blkiov
[i
].bi_size
;
201 copyout(blkiov15
, SCARG(uap
, blkiov
),
202 blkcnt
* sizeof(BLOCK_INFO_15
));
205 lfs_free(fs
, blkiov
, LFS_NB_BLKIOV
);
206 lfs_free(fs
, blkiov15
, LFS_NB_BLKIOV
);
207 KERNEL_UNLOCK_ONE(NULL
);
212 #define LFS_MARKV_MAX_BLOCKS (LFS_MAX_BUFS)
215 lfs_markv(struct lwp
*l
, fsid_t
*fsidp
, BLOCK_INFO
*blkiov
,
221 struct inode
*ip
= NULL
;
224 struct ulfsmount
*ump
;
234 /* number of blocks/inodes that we have already bwrite'ed */
235 int nblkwritten
, ninowritten
;
237 error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_LFS
,
238 KAUTH_REQ_SYSTEM_LFS_MARKV
, NULL
, NULL
, NULL
);
242 if ((mntp
= vfs_getvfs(fsidp
)) == NULL
)
245 ump
= VFSTOULFS(mntp
);
251 maxino
= (lfs_fragstoblks(fs
, lfs_dino_getblocks(fs
, VTOI(fs
->lfs_ivnode
)->i_din
)) -
252 lfs_sb_getcleansz(fs
) - lfs_sb_getsegtabsz(fs
)) * lfs_sb_getifpb(fs
);
256 if ((error
= vfs_busy(mntp
, NULL
)) != 0)
260 * This seglock is just to prevent the fact that we might have to sleep
261 * from allowing the possibility that our blocks might become
264 * It is also important to note here that unless we specify SEGM_CKP,
265 * any Ifile blocks that we might be asked to clean will never get
268 lfs_seglock(fs
, SEGM_CLEAN
| SEGM_CKP
| SEGM_SYNC
);
270 /* Mark blocks/inodes dirty. */
273 /* these were inside the initialization for the for loop */
275 lastino
= LFS_UNUSED_INUM
;
276 nblkwritten
= ninowritten
= 0;
277 for (blkp
= blkiov
; cnt
--; ++blkp
)
279 /* Bounds-check incoming data, avoid panic for failed VGET */
280 if (blkp
->bi_inode
<= 0 || blkp
->bi_inode
>= maxino
) {
285 * Get the IFILE entry (only once) and see if the file still
288 if (lastino
!= blkp
->bi_inode
) {
290 * Finish the old file, if there was one.
301 lastino
= blkp
->bi_inode
;
303 /* Get the vnode/inode. */
304 error
= lfs_fastvget(mntp
, blkp
->bi_inode
, blkp
,
305 LK_EXCLUSIVE
| LK_NOWAIT
, &vp
);
307 DLOG((DLOG_CLEAN
, "lfs_markv: lfs_fastvget"
308 " failed with %d (ino %d, segment %d)\n",
309 error
, blkp
->bi_inode
,
310 lfs_dtosn(fs
, blkp
->bi_daddr
)));
312 * If we got EAGAIN, that means that the
313 * Inode was locked. This is
314 * recoverable: just clean the rest of
315 * this segment, and let the cleaner try
316 * again with another. (When the
317 * cleaner runs again, this segment will
318 * sort high on the list, since it is
319 * now almost entirely empty.)
321 if (error
== EAGAIN
) {
325 KASSERT(error
== ENOENT
);
334 } else if (vp
== NULL
) {
336 * This can only happen if the vnode is dead (or
337 * in any case we can't get it...e.g., it is
338 * inlocked). Keep going.
343 /* Past this point we are guaranteed that vp, ip are valid. */
345 /* Can't clean VU_DIROP directories in case of truncation */
346 /* XXX - maybe we should mark removed dirs specially? */
347 if (vp
->v_type
== VDIR
&& (vp
->v_uflag
& VU_DIROP
)) {
352 /* If this BLOCK_INFO didn't contain a block, keep going. */
353 if (blkp
->bi_lbn
== LFS_UNUSED_LBN
) {
354 /* XXX need to make sure that the inode gets written in this case */
355 /* XXX but only write the inode if it's the right one */
356 if (blkp
->bi_inode
!= LFS_IFILE_INUM
) {
357 LFS_IENTRY(ifp
, fs
, blkp
->bi_inode
, bp
);
358 if (lfs_if_getdaddr(fs
, ifp
) == blkp
->bi_daddr
) {
359 mutex_enter(&lfs_lock
);
360 LFS_SET_UINO(ip
, IN_CLEANING
);
361 mutex_exit(&lfs_lock
);
369 if (VOP_BMAP(vp
, blkp
->bi_lbn
, NULL
, &b_daddr
, NULL
) ||
370 LFS_DBTOFSB(fs
, b_daddr
) != blkp
->bi_daddr
)
372 if (lfs_dtosn(fs
, LFS_DBTOFSB(fs
, b_daddr
)) ==
373 lfs_dtosn(fs
, blkp
->bi_daddr
))
375 DLOG((DLOG_CLEAN
, "lfs_markv: wrong da same seg: %jx vs %jx\n",
376 (intmax_t)blkp
->bi_daddr
, (intmax_t)LFS_DBTOFSB(fs
, b_daddr
)));
383 * Check block sizes. The blocks being cleaned come from
384 * disk, so they should have the same size as their on-disk
387 if (blkp
->bi_lbn
>= 0)
388 obsize
= lfs_blksize(fs
, ip
, blkp
->bi_lbn
);
390 obsize
= lfs_sb_getbsize(fs
);
391 /* Check for fragment size change */
392 if (blkp
->bi_lbn
>= 0 && blkp
->bi_lbn
< ULFS_NDADDR
) {
393 obsize
= ip
->i_lfs_fragsize
[blkp
->bi_lbn
];
395 if (obsize
!= blkp
->bi_size
) {
396 DLOG((DLOG_CLEAN
, "lfs_markv: ino %d lbn %jd wrong"
397 " size (%ld != %d), try again\n",
398 blkp
->bi_inode
, (intmax_t)blkp
->bi_lbn
,
399 (long) obsize
, blkp
->bi_size
));
405 * If we get to here, then we are keeping the block. If
406 * it is an indirect block, we want to actually put it
407 * in the buffer cache so that it can be updated in the
408 * finish_meta section. If it's not, we need to
409 * allocate a fake buffer so that writeseg can perform
410 * the copyin and write the buffer.
412 if (ip
->i_number
!= LFS_IFILE_INUM
&& blkp
->bi_lbn
>= 0) {
414 bp
= lfs_fakebuf(fs
, vp
, blkp
->bi_lbn
,
415 blkp
->bi_size
, blkp
->bi_bp
);
416 /* Pretend we used bread() to get it */
417 bp
->b_blkno
= LFS_FSBTODB(fs
, blkp
->bi_daddr
);
419 /* Indirect block or ifile */
420 if (blkp
->bi_size
!= lfs_sb_getbsize(fs
) &&
421 ip
->i_number
!= LFS_IFILE_INUM
)
422 panic("lfs_markv: partial indirect block?"
423 " size=%d\n", blkp
->bi_size
);
424 bp
= getblk(vp
, blkp
->bi_lbn
, blkp
->bi_size
, 0, 0);
425 if (!(bp
->b_oflags
& (BO_DONE
|BO_DELWRI
))) {
427 * The block in question was not found
428 * in the cache; i.e., the block that
429 * getblk() returned is empty. So, we
430 * can (and should) copy in the
431 * contents, because we've already
432 * determined that this was the right
433 * version of this block on disk.
435 * And, it can't have changed underneath
436 * us, because we have the segment lock.
438 error
= copyin(blkp
->bi_bp
, bp
->b_data
, blkp
->bi_size
);
443 if ((error
= lfs_bwrite_ext(bp
, BW_CLEAN
)) != 0)
448 * XXX should account indirect blocks and ifile pages as well
450 if (nblkwritten
+ lfs_lblkno(fs
, ninowritten
* DINOSIZE(fs
))
451 > LFS_MARKV_MAX_BLOCKS
) {
452 DLOG((DLOG_CLEAN
, "lfs_markv: writing %d blks %d inos\n",
453 nblkwritten
, ninowritten
));
454 lfs_segwrite(mntp
, SEGM_CLEAN
);
455 nblkwritten
= ninowritten
= 0;
460 * Finish the old file, if there was one
470 panic("lfs_markv: numrefed=%d", numrefed
);
472 DLOG((DLOG_CLEAN
, "lfs_markv: writing %d blks %d inos (check point)\n",
473 nblkwritten
, ninowritten
));
476 * The last write has to be SEGM_SYNC, because of calling semantics.
477 * It also has to be SEGM_CKP, because otherwise we could write
478 * over the newly cleaned data contained in a checkpoint, and then
479 * we'd be unhappy at recovery time.
481 lfs_segwrite(mntp
, SEGM_CLEAN
| SEGM_CKP
| SEGM_SYNC
);
485 vfs_unbusy(mntp
, false, NULL
);
494 DLOG((DLOG_CLEAN
, "lfs_markv err2\n"));
497 * XXX we're here because copyin() failed.
498 * XXX it means that we can't trust the cleanerd. too bad.
499 * XXX how can we recover from this?
504 * XXX should do segwrite here anyway?
514 vfs_unbusy(mntp
, false, NULL
);
517 panic("lfs_markv: numrefed=%d", numrefed
);
526 * This will fill in the current disk address for arrays of blocks.
529 * -1/errno is return on error.
531 #ifdef USE_64BIT_SYSCALLS
533 sys_lfs_bmapv(struct lwp
*l
, const struct sys_lfs_bmapv_args
*uap
, register_t
*retval
)
536 syscallarg(fsid_t *) fsidp;
537 syscallarg(struct block_info *) blkiov;
538 syscallarg(int) blkcnt;
546 if ((error
= copyin(SCARG(uap
, fsidp
), &fsid
, sizeof(fsid_t
))) != 0)
549 if ((mntp
= vfs_getvfs(&fsid
)) == NULL
)
551 fs
= VFSTOULFS(mntp
)->um_lfs
;
553 blkcnt
= SCARG(uap
, blkcnt
);
554 if ((u_int
) blkcnt
> SIZE_T_MAX
/ sizeof(BLOCK_INFO
))
556 KERNEL_LOCK(1, NULL
);
557 blkiov
= lfs_malloc(fs
, blkcnt
* sizeof(BLOCK_INFO
), LFS_NB_BLKIOV
);
558 if ((error
= copyin(SCARG(uap
, blkiov
), blkiov
,
559 blkcnt
* sizeof(BLOCK_INFO
))) != 0)
562 if ((error
= lfs_bmapv(p
, &fsid
, blkiov
, blkcnt
)) == 0)
563 copyout(blkiov
, SCARG(uap
, blkiov
),
564 blkcnt
* sizeof(BLOCK_INFO
));
566 lfs_free(fs
, blkiov
, LFS_NB_BLKIOV
);
567 KERNEL_UNLOCK_ONE(NULL
);
572 sys_lfs_bmapv(struct lwp
*l
, const struct sys_lfs_bmapv_args
*uap
, register_t
*retval
)
575 syscallarg(fsid_t *) fsidp;
576 syscallarg(struct block_info *) blkiov;
577 syscallarg(int) blkcnt;
580 BLOCK_INFO_15
*blkiov15
;
581 int i
, blkcnt
, error
;
586 if ((error
= copyin(SCARG(uap
, fsidp
), &fsid
, sizeof(fsid_t
))) != 0)
589 if ((mntp
= vfs_getvfs(&fsid
)) == NULL
)
591 fs
= VFSTOULFS(mntp
)->um_lfs
;
593 blkcnt
= SCARG(uap
, blkcnt
);
594 if ((size_t) blkcnt
> SIZE_T_MAX
/ sizeof(BLOCK_INFO
))
596 KERNEL_LOCK(1, NULL
);
597 blkiov
= lfs_malloc(fs
, blkcnt
* sizeof(BLOCK_INFO
), LFS_NB_BLKIOV
);
598 blkiov15
= lfs_malloc(fs
, blkcnt
* sizeof(BLOCK_INFO_15
), LFS_NB_BLKIOV
);
599 if ((error
= copyin(SCARG(uap
, blkiov
), blkiov15
,
600 blkcnt
* sizeof(BLOCK_INFO_15
))) != 0)
603 for (i
= 0; i
< blkcnt
; i
++) {
604 blkiov
[i
].bi_inode
= blkiov15
[i
].bi_inode
;
605 blkiov
[i
].bi_lbn
= blkiov15
[i
].bi_lbn
;
606 blkiov
[i
].bi_daddr
= blkiov15
[i
].bi_daddr
;
607 blkiov
[i
].bi_segcreate
= blkiov15
[i
].bi_segcreate
;
608 blkiov
[i
].bi_version
= blkiov15
[i
].bi_version
;
609 blkiov
[i
].bi_bp
= blkiov15
[i
].bi_bp
;
610 blkiov
[i
].bi_size
= blkiov15
[i
].bi_size
;
613 if ((error
= lfs_bmapv(l
, &fsid
, blkiov
, blkcnt
)) == 0) {
614 for (i
= 0; i
< blkcnt
; i
++) {
615 blkiov15
[i
].bi_inode
= blkiov
[i
].bi_inode
;
616 blkiov15
[i
].bi_lbn
= blkiov
[i
].bi_lbn
;
617 blkiov15
[i
].bi_daddr
= blkiov
[i
].bi_daddr
;
618 blkiov15
[i
].bi_segcreate
= blkiov
[i
].bi_segcreate
;
619 blkiov15
[i
].bi_version
= blkiov
[i
].bi_version
;
620 blkiov15
[i
].bi_bp
= blkiov
[i
].bi_bp
;
621 blkiov15
[i
].bi_size
= blkiov
[i
].bi_size
;
623 copyout(blkiov15
, SCARG(uap
, blkiov
),
624 blkcnt
* sizeof(BLOCK_INFO_15
));
627 lfs_free(fs
, blkiov
, LFS_NB_BLKIOV
);
628 lfs_free(fs
, blkiov15
, LFS_NB_BLKIOV
);
629 KERNEL_UNLOCK_ONE(NULL
);
635 lfs_bmapv(struct lwp
*l
, fsid_t
*fsidp
, BLOCK_INFO
*blkiov
, int blkcnt
)
640 struct inode
*ip
= NULL
;
643 struct ulfsmount
*ump
;
650 error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_LFS
,
651 KAUTH_REQ_SYSTEM_LFS_BMAPV
, NULL
, NULL
, NULL
);
655 if ((mntp
= vfs_getvfs(fsidp
)) == NULL
)
658 ump
= VFSTOULFS(mntp
);
659 if ((error
= vfs_busy(mntp
, NULL
)) != 0)
662 if (ump
->um_cleaner_thread
== NULL
)
663 ump
->um_cleaner_thread
= curlwp
;
664 KASSERT(ump
->um_cleaner_thread
== curlwp
);
668 fs
= VFSTOULFS(mntp
)->um_lfs
;
672 /* these were inside the initialization for the for loop */
674 v_daddr
= LFS_UNUSED_DADDR
;
675 lastino
= LFS_UNUSED_INUM
;
676 for (blkp
= blkiov
; cnt
--; ++blkp
)
679 * Get the IFILE entry (only once) and see if the file still
682 if (lastino
!= blkp
->bi_inode
) {
684 * Finish the old file, if there was one.
695 lastino
= blkp
->bi_inode
;
696 if (blkp
->bi_inode
== LFS_IFILE_INUM
)
697 v_daddr
= lfs_sb_getidaddr(fs
);
699 LFS_IENTRY(ifp
, fs
, blkp
->bi_inode
, bp
);
700 v_daddr
= lfs_if_getdaddr(fs
, ifp
);
703 if (v_daddr
== LFS_UNUSED_DADDR
) {
704 blkp
->bi_daddr
= LFS_UNUSED_DADDR
;
707 error
= lfs_fastvget(mntp
, blkp
->bi_inode
, NULL
,
710 DLOG((DLOG_CLEAN
, "lfs_bmapv: lfs_fastvget ino"
712 blkp
->bi_inode
,error
));
716 KASSERT(VOP_ISLOCKED(vp
));
720 } else if (vp
== NULL
) {
722 * This can only happen if the vnode is dead.
723 * Keep going. Note that we DO NOT set the
724 * bi_addr to anything -- if we failed to get
725 * the vnode, for example, we want to assume
726 * conservatively that all of its blocks *are*
727 * located in the segment in question.
728 * lfs_markv will throw them out if we are
734 /* Past this point we are guaranteed that vp, ip are valid. */
736 if (blkp
->bi_lbn
== LFS_UNUSED_LBN
) {
738 * We just want the inode address, which is
739 * conveniently in v_daddr.
741 blkp
->bi_daddr
= v_daddr
;
745 error
= VOP_BMAP(vp
, blkp
->bi_lbn
, NULL
,
749 blkp
->bi_daddr
= LFS_UNUSED_DADDR
;
752 blkp
->bi_daddr
= LFS_DBTOFSB(fs
, bi_daddr
);
753 /* Fill in the block size, too */
754 if (blkp
->bi_lbn
>= 0)
755 blkp
->bi_size
= lfs_blksize(fs
, ip
, blkp
->bi_lbn
);
757 blkp
->bi_size
= lfs_sb_getbsize(fs
);
762 * Finish the old file, if there was one.
772 panic("lfs_bmapv: numrefed=%d", numrefed
);
775 vfs_unbusy(mntp
, false, NULL
);
783 * Mark the segment clean.
786 * -1/errno is return on error.
789 sys_lfs_segclean(struct lwp
*l
, const struct sys_lfs_segclean_args
*uap
, register_t
*retval
)
792 syscallarg(fsid_t *) fsidp;
793 syscallarg(u_long) segment;
799 unsigned long segnum
;
801 error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_LFS
,
802 KAUTH_REQ_SYSTEM_LFS_SEGCLEAN
, NULL
, NULL
, NULL
);
806 if ((error
= copyin(SCARG(uap
, fsidp
), &fsid
, sizeof(fsid_t
))) != 0)
808 if ((mntp
= vfs_getvfs(&fsid
)) == NULL
)
811 fs
= VFSTOULFS(mntp
)->um_lfs
;
812 segnum
= SCARG(uap
, segment
);
814 if ((error
= vfs_busy(mntp
, NULL
)) != 0)
817 KERNEL_LOCK(1, NULL
);
818 lfs_seglock(fs
, SEGM_PROT
);
819 error
= lfs_do_segclean(fs
, segnum
);
821 KERNEL_UNLOCK_ONE(NULL
);
822 vfs_unbusy(mntp
, false, NULL
);
827 * Actually mark the segment clean.
828 * Must be called with the segment lock held.
831 lfs_do_segclean(struct lfs
*fs
, unsigned long segnum
)
833 extern int lfs_dostats
;
838 if (lfs_dtosn(fs
, lfs_sb_getcurseg(fs
)) == segnum
) {
842 LFS_SEGENTRY(sup
, fs
, segnum
, bp
);
843 if (sup
->su_nbytes
) {
844 DLOG((DLOG_CLEAN
, "lfs_segclean: not cleaning segment %lu:"
845 " %d live bytes\n", segnum
, sup
->su_nbytes
));
849 if (sup
->su_flags
& SEGUSE_ACTIVE
) {
850 DLOG((DLOG_CLEAN
, "lfs_segclean: not cleaning segment %lu:"
851 " segment is active\n", segnum
));
855 if (!(sup
->su_flags
& SEGUSE_DIRTY
)) {
856 DLOG((DLOG_CLEAN
, "lfs_segclean: not cleaning segment %lu:"
857 " segment is already clean\n", segnum
));
862 lfs_sb_addavail(fs
, lfs_segtod(fs
, 1));
863 if (sup
->su_flags
& SEGUSE_SUPERBLOCK
)
864 lfs_sb_subavail(fs
, lfs_btofsb(fs
, LFS_SBPAD
));
865 if (lfs_sb_getversion(fs
) > 1 && segnum
== 0 &&
866 lfs_sb_gets0addr(fs
) < lfs_btofsb(fs
, LFS_LABELPAD
))
867 lfs_sb_subavail(fs
, lfs_btofsb(fs
, LFS_LABELPAD
) - lfs_sb_gets0addr(fs
));
868 mutex_enter(&lfs_lock
);
869 lfs_sb_addbfree(fs
, sup
->su_nsums
* lfs_btofsb(fs
, lfs_sb_getsumsize(fs
)) +
870 lfs_btofsb(fs
, sup
->su_ninos
* lfs_sb_getibsize(fs
)));
871 lfs_sb_subdmeta(fs
, sup
->su_nsums
* lfs_btofsb(fs
, lfs_sb_getsumsize(fs
)) +
872 lfs_btofsb(fs
, sup
->su_ninos
* lfs_sb_getibsize(fs
)));
873 if (lfs_sb_getdmeta(fs
) < 0)
874 lfs_sb_setdmeta(fs
, 0);
875 mutex_exit(&lfs_lock
);
876 sup
->su_flags
&= ~SEGUSE_DIRTY
;
877 LFS_WRITESEGENTRY(sup
, fs
, segnum
, bp
);
879 LFS_CLEANERINFO(cip
, fs
, bp
);
880 lfs_ci_shiftdirtytoclean(fs
, cip
, 1);
881 lfs_sb_setnclean(fs
, lfs_ci_getclean(fs
, cip
));
882 mutex_enter(&lfs_lock
);
883 lfs_ci_setbfree(fs
, cip
, lfs_sb_getbfree(fs
));
884 lfs_ci_setavail(fs
, cip
, lfs_sb_getavail(fs
)
885 - fs
->lfs_ravail
- fs
->lfs_favail
);
886 wakeup(&fs
->lfs_availsleep
);
887 mutex_exit(&lfs_lock
);
888 (void) LFS_BWRITE_LOG(bp
);
891 ++lfs_stats
.segs_reclaimed
;
897 * This will block until a segment in file system fsid is written. A timeout
898 * in milliseconds may be specified which will awake the cleaner automatically.
899 * An fsid of -1 means any file system, and a timeout of 0 means forever.
902 lfs_segwait(fsid_t
*fsidp
, struct timeval
*tv
)
909 KERNEL_LOCK(1, NULL
);
910 if (fsidp
== NULL
|| (mntp
= vfs_getvfs(fsidp
)) == NULL
)
911 addr
= &lfs_allclean_wakeup
;
913 addr
= &VFSTOULFS(mntp
)->um_lfs
->lfs_nextsegsleep
;
915 * XXX THIS COULD SLEEP FOREVER IF TIMEOUT IS {0,0}!
916 * XXX IS THAT WHAT IS INTENDED?
918 timeout
= tvtohz(tv
);
919 error
= tsleep(addr
, PCATCH
| PVFS
, "segment", timeout
);
920 KERNEL_UNLOCK_ONE(NULL
);
921 return (error
== ERESTART
? EINTR
: 0);
927 * System call wrapper around lfs_segwait().
931 * -1/errno is return on error.
934 sys___lfs_segwait50(struct lwp
*l
, const struct sys___lfs_segwait50_args
*uap
,
938 syscallarg(fsid_t *) fsidp;
939 syscallarg(struct timeval *) tv;
945 /* XXX need we be su to segwait? */
946 error
= kauth_authorize_system(l
->l_cred
, KAUTH_SYSTEM_LFS
,
947 KAUTH_REQ_SYSTEM_LFS_SEGWAIT
, NULL
, NULL
, NULL
);
950 if ((error
= copyin(SCARG(uap
, fsidp
), &fsid
, sizeof(fsid_t
))) != 0)
953 if (SCARG(uap
, tv
)) {
954 error
= copyin(SCARG(uap
, tv
), &atv
, sizeof(struct timeval
));
959 } else /* NULL or invalid */
960 atv
.tv_sec
= atv
.tv_usec
= 0;
961 return lfs_segwait(&fsid
, &atv
);
965 * VFS_VGET call specialized for the cleaner. If the cleaner is
966 * processing IINFO structures, it may have the ondisk inode already, so
967 * don't go retrieving it again.
969 * Return the vnode referenced and locked.
973 lfs_fastvget(struct mount
*mp
, ino_t ino
, BLOCK_INFO
*blkp
, int lk_flags
,
976 struct ulfsmount
*ump
;
980 ump
->um_cleaner_hint
= blkp
;
981 error
= vcache_get(mp
, &ino
, sizeof(ino
), vpp
);
982 ump
->um_cleaner_hint
= NULL
;
985 error
= vn_lock(*vpp
, lk_flags
);
998 * Make up a "fake" cleaner buffer, copy the data from userland into it.
1001 lfs_fakebuf(struct lfs
*fs
, struct vnode
*vp
, daddr_t lbn
, size_t size
, void *uaddr
)
1006 KASSERT(VTOI(vp
)->i_number
!= LFS_IFILE_INUM
);
1008 bp
= lfs_newbuf(VTOI(vp
)->i_lfs
, vp
, lbn
, size
, LFS_NB_CLEAN
);
1009 error
= copyin(uaddr
, bp
->b_data
, size
);
1011 lfs_freebuf(fs
, bp
);
1014 KDASSERT(bp
->b_iodone
== lfs_callback
);
1017 mutex_enter(&lfs_lock
);
1019 mutex_exit(&lfs_lock
);
1021 bp
->b_bufsize
= size
;
1022 bp
->b_bcount
= size
;