Patrick Welche <prlw1@cam.ac.uk>
[netbsd-mini2440.git] / libexec / lfs_cleanerd / coalesce.c
blob45fc183fa8b381e3d46e05da71cee924fe7b262a
1 /* $NetBSD: coalesce.c,v 1.17 2009/03/16 00:08:10 lukem Exp $ */
3 /*-
4 * Copyright (c) 2002, 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/param.h>
33 #include <sys/mount.h>
34 #include <sys/time.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38 #include <sys/mman.h>
40 #include <ufs/ufs/dinode.h>
41 #include <ufs/lfs/lfs.h>
43 #include <fcntl.h>
44 #include <signal.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <time.h>
49 #include <unistd.h>
50 #include <util.h>
51 #include <errno.h>
52 #include <err.h>
54 #include <syslog.h>
56 #include "bufcache.h"
57 #include "vnode.h"
58 #include "cleaner.h"
59 #include "kernelops.h"
61 extern int debug, do_mmap;
63 int log2int(int n)
65 int log;
67 log = 0;
68 while (n > 0) {
69 ++log;
70 n >>= 1;
72 return log - 1;
75 enum coalesce_returncodes {
76 COALESCE_OK = 0,
77 COALESCE_NOINODE,
78 COALESCE_TOOSMALL,
79 COALESCE_BADSIZE,
80 COALESCE_BADBLOCKSIZE,
81 COALESCE_NOMEM,
82 COALESCE_BADBMAPV,
83 COALESCE_BADMARKV,
84 COALESCE_NOTWORTHIT,
85 COALESCE_NOTHINGLEFT,
86 COALESCE_EIO,
88 COALESCE_MAXERROR
91 const char *coalesce_return[] = {
92 "Successfully coalesced",
93 "File not in use or inode not found",
94 "Not large enough to coalesce",
95 "Negative size",
96 "Not enough blocks to account for size",
97 "Malloc failed",
98 "LFCNBMAPV failed",
99 "Not broken enough to fix",
100 "Too many blocks not found",
101 "Too many blocks found in active segments",
102 "I/O error",
104 "No such error"
107 static struct ufs1_dinode *
108 get_dinode(struct clfs *fs, ino_t ino)
110 IFILE *ifp;
111 daddr_t daddr;
112 struct ubuf *bp;
113 struct ufs1_dinode *dip, *r;
115 lfs_ientry(&ifp, fs, ino, &bp);
116 daddr = ifp->if_daddr;
117 brelse(bp, 0);
119 if (daddr == 0x0)
120 return NULL;
122 bread(fs->clfs_devvp, daddr, fs->lfs_ibsize, NOCRED, 0, &bp);
123 for (dip = (struct ufs1_dinode *)bp->b_data;
124 dip < (struct ufs1_dinode *)(bp->b_data + fs->lfs_ibsize); dip++)
125 if (dip->di_inumber == ino) {
126 r = (struct ufs1_dinode *)malloc(sizeof(*r));
127 memcpy(r, dip, sizeof(*r));
128 brelse(bp, 0);
129 return r;
131 brelse(bp, 0);
132 return NULL;
136 * Find out if this inode's data blocks are discontinuous; if they are,
137 * rewrite them using markv. Return the number of inodes rewritten.
139 static int
140 clean_inode(struct clfs *fs, ino_t ino)
142 BLOCK_INFO *bip = NULL, *tbip;
143 CLEANERINFO cip;
144 struct ubuf *bp;
145 struct ufs1_dinode *dip;
146 struct clfs_seguse *sup;
147 struct lfs_fcntl_markv /* {
148 BLOCK_INFO *blkiov;
149 int blkcnt;
150 } */ lim;
151 daddr_t toff;
152 int i;
153 int nb, onb, noff;
154 int retval;
155 int bps;
157 dip = get_dinode(fs, ino);
158 if (dip == NULL)
159 return COALESCE_NOINODE;
161 /* Compute file block size, set up for bmapv */
162 onb = nb = lblkno(fs, dip->di_size);
164 /* XXX for now, don't do any file small enough to have fragments */
165 if (nb < NDADDR) {
166 free(dip);
167 return COALESCE_TOOSMALL;
170 /* Sanity checks */
171 #if 0 /* di_size is uint64_t -- this is a noop */
172 if (dip->di_size < 0) {
173 dlog("ino %d, negative size (%" PRId64 ")", ino, dip->di_size);
174 free(dip);
175 return COALESCE_BADSIZE;
177 #endif
178 if (nb > dip->di_blocks) {
179 dlog("ino %d, computed blocks %d > held blocks %d", ino, nb,
180 dip->di_blocks);
181 free(dip);
182 return COALESCE_BADBLOCKSIZE;
185 bip = (BLOCK_INFO *)malloc(sizeof(BLOCK_INFO) * nb);
186 if (bip == NULL) {
187 syslog(LOG_WARNING, "ino %llu, %d blocks: %m",
188 (unsigned long long)ino, nb);
189 free(dip);
190 return COALESCE_NOMEM;
192 for (i = 0; i < nb; i++) {
193 memset(bip + i, 0, sizeof(BLOCK_INFO));
194 bip[i].bi_inode = ino;
195 bip[i].bi_lbn = i;
196 bip[i].bi_version = dip->di_gen;
197 /* Don't set the size, but let lfs_bmap fill it in */
199 lim.blkiov = bip;
200 lim.blkcnt = nb;
201 if (kops.ko_fcntl(fs->clfs_ifilefd, LFCNBMAPV, &lim) < 0) {
202 syslog(LOG_WARNING, "%s: coalesce: LFCNBMAPV: %m",
203 fs->lfs_fsmnt);
204 retval = COALESCE_BADBMAPV;
205 goto out;
207 #if 0
208 for (i = 0; i < nb; i++) {
209 printf("bi_size = %d, bi_ino = %d, "
210 "bi_lbn = %d, bi_daddr = %d\n",
211 bip[i].bi_size, bip[i].bi_inode, bip[i].bi_lbn,
212 bip[i].bi_daddr);
214 #endif
215 noff = toff = 0;
216 for (i = 1; i < nb; i++) {
217 if (bip[i].bi_daddr != bip[i - 1].bi_daddr + fs->lfs_frag)
218 ++noff;
219 toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr
220 - fs->lfs_frag) >> fs->lfs_fbshift;
224 * If this file is not discontinuous, there's no point in rewriting it.
226 * Explicitly allow a certain amount of discontinuity, since large
227 * files will be broken among segments and medium-sized files
228 * can have a break or two and it's okay.
230 if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
231 segtod(fs, noff) * 2 < nb) {
232 retval = COALESCE_NOTWORTHIT;
233 goto out;
234 } else if (debug)
235 syslog(LOG_DEBUG, "ino %llu total discontinuity "
236 "%d (%lld) for %d blocks", (unsigned long long)ino,
237 noff, (long long)toff, nb);
239 /* Search for blocks in active segments; don't move them. */
240 for (i = 0; i < nb; i++) {
241 if (bip[i].bi_daddr <= 0)
242 continue;
243 sup = &fs->clfs_segtab[dtosn(fs, bip[i].bi_daddr)];
244 if (sup->flags & SEGUSE_ACTIVE)
245 bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
249 * Get rid of any blocks we've marked dead. If this is an older
250 * kernel that doesn't have bmapv fill in the block sizes, we'll
251 * toss everything here.
253 onb = nb;
254 toss_old_blocks(fs, &bip, &nb, NULL);
255 nb = i;
258 * We may have tossed enough blocks that it is no longer worthwhile
259 * to rewrite this inode.
261 if (nb == 0 || onb - nb > log2int(onb)) {
262 if (debug)
263 syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
264 retval = COALESCE_NOTHINGLEFT;
265 goto out;
269 * We are going to rewrite this inode.
270 * For any remaining blocks, read in their contents.
272 for (i = 0; i < nb; i++) {
273 bip[i].bi_bp = malloc(bip[i].bi_size);
274 if (bip[i].bi_bp == NULL) {
275 syslog(LOG_WARNING, "allocate block buffer size=%d: %m",
276 bip[i].bi_size);
277 retval = COALESCE_NOMEM;
278 goto out;
281 if (kops.ko_pread(fs->clfs_devfd, bip[i].bi_bp, bip[i].bi_size,
282 fsbtob(fs, bip[i].bi_daddr)) < 0) {
283 retval = COALESCE_EIO;
284 goto out;
287 if (debug)
288 syslog(LOG_DEBUG, "ino %llu markv %d blocks",
289 (unsigned long long)ino, nb);
292 * Write in segment-sized chunks. If at any point we'd write more
293 * than half of the available segments, sleep until that's not
294 * true any more.
296 bps = segtod(fs, 1);
297 for (tbip = bip; tbip < bip + nb; tbip += bps) {
298 do {
299 bread(fs->lfs_ivnode, 0, fs->lfs_bsize, NOCRED, 0, &bp);
300 cip = *(CLEANERINFO *)bp->b_data;
301 brelse(bp, B_INVAL);
303 if (cip.clean < 4) /* XXX magic number 4 */
304 kops.ko_fcntl(fs->clfs_ifilefd,
305 LFCNSEGWAIT, NULL);
306 } while(cip.clean < 4);
308 lim.blkiov = tbip;
309 lim.blkcnt = (tbip + bps < bip + nb ? bps : nb % bps);
310 if (kops.ko_fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim) < 0) {
311 retval = COALESCE_BADMARKV;
312 goto out;
316 retval = COALESCE_OK;
317 out:
318 free(dip);
319 if (bip) {
320 for (i = 0; i < onb; i++)
321 if (bip[i].bi_bp)
322 free(bip[i].bi_bp);
323 free(bip);
325 return retval;
329 * Try coalescing every inode in the filesystem.
330 * Return the number of inodes actually altered.
332 int clean_all_inodes(struct clfs *fs)
334 int i, r, maxino;
335 int totals[COALESCE_MAXERROR];
336 struct stat st;
338 memset(totals, 0, sizeof(totals));
340 fstat(fs->clfs_ifilefd, &st);
341 maxino = fs->lfs_ifpb * (st.st_size >> fs->lfs_bshift) -
342 fs->lfs_segtabsz - fs->lfs_cleansz;
344 for (i = 0; i < maxino; i++) {
345 r = clean_inode(fs, i);
346 ++totals[r];
349 for (i = 0; i < COALESCE_MAXERROR; i++)
350 if (totals[i])
351 syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
352 totals[i]);
354 return totals[COALESCE_OK];
358 * Fork a child process to coalesce this fs.
361 fork_coalesce(struct clfs *fs)
363 static pid_t childpid;
364 int num;
367 * If already running a coalescing child, don't start a new one.
369 if (childpid) {
370 if (waitpid(childpid, NULL, WNOHANG) == childpid)
371 childpid = 0;
373 if (childpid && kill(childpid, 0) >= 0) {
374 /* already running a coalesce process */
375 if (debug)
376 syslog(LOG_DEBUG, "coalescing already in progress");
377 return 0;
381 * Fork a child and let the child coalease
383 childpid = fork();
384 if (childpid < 0) {
385 syslog(LOG_ERR, "%s: fork to coaleasce: %m", fs->lfs_fsmnt);
386 return 0;
387 } else if (childpid == 0) {
388 syslog(LOG_NOTICE, "%s: new coalescing process, pid %d",
389 fs->lfs_fsmnt, getpid());
390 num = clean_all_inodes(fs);
391 syslog(LOG_NOTICE, "%s: coalesced %d discontiguous inodes",
392 fs->lfs_fsmnt, num);
393 exit(0);
396 return 0;