1 /* $NetBSD: coalesce.c,v 1.17 2009/03/16 00:08:10 lukem Exp $ */
4 * Copyright (c) 2002, 2005 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/param.h>
33 #include <sys/mount.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
40 #include <ufs/ufs/dinode.h>
41 #include <ufs/lfs/lfs.h>
59 #include "kernelops.h"
61 extern int debug
, do_mmap
;
75 enum coalesce_returncodes
{
80 COALESCE_BADBLOCKSIZE
,
91 const char *coalesce_return
[] = {
92 "Successfully coalesced",
93 "File not in use or inode not found",
94 "Not large enough to coalesce",
96 "Not enough blocks to account for size",
99 "Not broken enough to fix",
100 "Too many blocks not found",
101 "Too many blocks found in active segments",
107 static struct ufs1_dinode
*
108 get_dinode(struct clfs
*fs
, ino_t ino
)
113 struct ufs1_dinode
*dip
, *r
;
115 lfs_ientry(&ifp
, fs
, ino
, &bp
);
116 daddr
= ifp
->if_daddr
;
122 bread(fs
->clfs_devvp
, daddr
, fs
->lfs_ibsize
, NOCRED
, 0, &bp
);
123 for (dip
= (struct ufs1_dinode
*)bp
->b_data
;
124 dip
< (struct ufs1_dinode
*)(bp
->b_data
+ fs
->lfs_ibsize
); dip
++)
125 if (dip
->di_inumber
== ino
) {
126 r
= (struct ufs1_dinode
*)malloc(sizeof(*r
));
127 memcpy(r
, dip
, sizeof(*r
));
136 * Find out if this inode's data blocks are discontinuous; if they are,
137 * rewrite them using markv. Return the number of inodes rewritten.
140 clean_inode(struct clfs
*fs
, ino_t ino
)
142 BLOCK_INFO
*bip
= NULL
, *tbip
;
145 struct ufs1_dinode
*dip
;
146 struct clfs_seguse
*sup
;
147 struct lfs_fcntl_markv
/* {
157 dip
= get_dinode(fs
, ino
);
159 return COALESCE_NOINODE
;
161 /* Compute file block size, set up for bmapv */
162 onb
= nb
= lblkno(fs
, dip
->di_size
);
164 /* XXX for now, don't do any file small enough to have fragments */
167 return COALESCE_TOOSMALL
;
171 #if 0 /* di_size is uint64_t -- this is a noop */
172 if (dip
->di_size
< 0) {
173 dlog("ino %d, negative size (%" PRId64
")", ino
, dip
->di_size
);
175 return COALESCE_BADSIZE
;
178 if (nb
> dip
->di_blocks
) {
179 dlog("ino %d, computed blocks %d > held blocks %d", ino
, nb
,
182 return COALESCE_BADBLOCKSIZE
;
185 bip
= (BLOCK_INFO
*)malloc(sizeof(BLOCK_INFO
) * nb
);
187 syslog(LOG_WARNING
, "ino %llu, %d blocks: %m",
188 (unsigned long long)ino
, nb
);
190 return COALESCE_NOMEM
;
192 for (i
= 0; i
< nb
; i
++) {
193 memset(bip
+ i
, 0, sizeof(BLOCK_INFO
));
194 bip
[i
].bi_inode
= ino
;
196 bip
[i
].bi_version
= dip
->di_gen
;
197 /* Don't set the size, but let lfs_bmap fill it in */
201 if (kops
.ko_fcntl(fs
->clfs_ifilefd
, LFCNBMAPV
, &lim
) < 0) {
202 syslog(LOG_WARNING
, "%s: coalesce: LFCNBMAPV: %m",
204 retval
= COALESCE_BADBMAPV
;
208 for (i
= 0; i
< nb
; i
++) {
209 printf("bi_size = %d, bi_ino = %d, "
210 "bi_lbn = %d, bi_daddr = %d\n",
211 bip
[i
].bi_size
, bip
[i
].bi_inode
, bip
[i
].bi_lbn
,
216 for (i
= 1; i
< nb
; i
++) {
217 if (bip
[i
].bi_daddr
!= bip
[i
- 1].bi_daddr
+ fs
->lfs_frag
)
219 toff
+= abs(bip
[i
].bi_daddr
- bip
[i
- 1].bi_daddr
220 - fs
->lfs_frag
) >> fs
->lfs_fbshift
;
224 * If this file is not discontinuous, there's no point in rewriting it.
226 * Explicitly allow a certain amount of discontinuity, since large
227 * files will be broken among segments and medium-sized files
228 * can have a break or two and it's okay.
230 if (nb
<= 1 || noff
== 0 || noff
< log2int(nb
) ||
231 segtod(fs
, noff
) * 2 < nb
) {
232 retval
= COALESCE_NOTWORTHIT
;
235 syslog(LOG_DEBUG
, "ino %llu total discontinuity "
236 "%d (%lld) for %d blocks", (unsigned long long)ino
,
237 noff
, (long long)toff
, nb
);
239 /* Search for blocks in active segments; don't move them. */
240 for (i
= 0; i
< nb
; i
++) {
241 if (bip
[i
].bi_daddr
<= 0)
243 sup
= &fs
->clfs_segtab
[dtosn(fs
, bip
[i
].bi_daddr
)];
244 if (sup
->flags
& SEGUSE_ACTIVE
)
245 bip
[i
].bi_daddr
= LFS_UNUSED_DADDR
; /* 0 */
249 * Get rid of any blocks we've marked dead. If this is an older
250 * kernel that doesn't have bmapv fill in the block sizes, we'll
251 * toss everything here.
254 toss_old_blocks(fs
, &bip
, &nb
, NULL
);
258 * We may have tossed enough blocks that it is no longer worthwhile
259 * to rewrite this inode.
261 if (nb
== 0 || onb
- nb
> log2int(onb
)) {
263 syslog(LOG_DEBUG
, "too many blocks tossed, not rewriting");
264 retval
= COALESCE_NOTHINGLEFT
;
269 * We are going to rewrite this inode.
270 * For any remaining blocks, read in their contents.
272 for (i
= 0; i
< nb
; i
++) {
273 bip
[i
].bi_bp
= malloc(bip
[i
].bi_size
);
274 if (bip
[i
].bi_bp
== NULL
) {
275 syslog(LOG_WARNING
, "allocate block buffer size=%d: %m",
277 retval
= COALESCE_NOMEM
;
281 if (kops
.ko_pread(fs
->clfs_devfd
, bip
[i
].bi_bp
, bip
[i
].bi_size
,
282 fsbtob(fs
, bip
[i
].bi_daddr
)) < 0) {
283 retval
= COALESCE_EIO
;
288 syslog(LOG_DEBUG
, "ino %llu markv %d blocks",
289 (unsigned long long)ino
, nb
);
292 * Write in segment-sized chunks. If at any point we'd write more
293 * than half of the available segments, sleep until that's not
297 for (tbip
= bip
; tbip
< bip
+ nb
; tbip
+= bps
) {
299 bread(fs
->lfs_ivnode
, 0, fs
->lfs_bsize
, NOCRED
, 0, &bp
);
300 cip
= *(CLEANERINFO
*)bp
->b_data
;
303 if (cip
.clean
< 4) /* XXX magic number 4 */
304 kops
.ko_fcntl(fs
->clfs_ifilefd
,
306 } while(cip
.clean
< 4);
309 lim
.blkcnt
= (tbip
+ bps
< bip
+ nb
? bps
: nb
% bps
);
310 if (kops
.ko_fcntl(fs
->clfs_ifilefd
, LFCNMARKV
, &lim
) < 0) {
311 retval
= COALESCE_BADMARKV
;
316 retval
= COALESCE_OK
;
320 for (i
= 0; i
< onb
; i
++)
329 * Try coalescing every inode in the filesystem.
330 * Return the number of inodes actually altered.
332 int clean_all_inodes(struct clfs
*fs
)
335 int totals
[COALESCE_MAXERROR
];
338 memset(totals
, 0, sizeof(totals
));
340 fstat(fs
->clfs_ifilefd
, &st
);
341 maxino
= fs
->lfs_ifpb
* (st
.st_size
>> fs
->lfs_bshift
) -
342 fs
->lfs_segtabsz
- fs
->lfs_cleansz
;
344 for (i
= 0; i
< maxino
; i
++) {
345 r
= clean_inode(fs
, i
);
349 for (i
= 0; i
< COALESCE_MAXERROR
; i
++)
351 syslog(LOG_DEBUG
, "%s: %d", coalesce_return
[i
],
354 return totals
[COALESCE_OK
];
358 * Fork a child process to coalesce this fs.
361 fork_coalesce(struct clfs
*fs
)
363 static pid_t childpid
;
367 * If already running a coalescing child, don't start a new one.
370 if (waitpid(childpid
, NULL
, WNOHANG
) == childpid
)
373 if (childpid
&& kill(childpid
, 0) >= 0) {
374 /* already running a coalesce process */
376 syslog(LOG_DEBUG
, "coalescing already in progress");
381 * Fork a child and let the child coalease
385 syslog(LOG_ERR
, "%s: fork to coaleasce: %m", fs
->lfs_fsmnt
);
387 } else if (childpid
== 0) {
388 syslog(LOG_NOTICE
, "%s: new coalescing process, pid %d",
389 fs
->lfs_fsmnt
, getpid());
390 num
= clean_all_inodes(fs
);
391 syslog(LOG_NOTICE
, "%s: coalesced %d discontiguous inodes",