1 /* $NetBSD: resize_ffs.c,v 1.11 2007/12/15 16:32:06 perry Exp $ */
2 /* From sources sent on February 17, 2003 */
4 * As its sole author, I explicitly place this code in the public
5 * domain. Anyone may use it for any purpose (though I would
6 * appreciate credit where it is due).
10 * mouse@rodents.montreal.qc.ca
11 * 7D C8 61 52 5D E7 2D 39 4E F1 31 3E E8 B3 27 4B
16 * Resize a filesystem. Is capable of both growing and shrinking.
18 * Usage: resize_ffs filesystem newsize
20 * Example: resize_ffs /dev/rsd1e 29574
22 * newsize is in DEV_BSIZE units (ie, disk sectors, usually 512 bytes
25 * Note: this currently requires gcc to build, since it is written
26 * depending on gcc-specific features, notably nested function
27 * definitions (which in at least a few cases depend on the lexical
28 * scoping gcc provides, so they can't be trivially moved outside).
30 * It will not do anything useful with filesystems in other than
31 * host-native byte order. This really should be fixed (it's largely
32 * a historical accident; the original version of this program is
33 * older than bi-endian support in FFS).
35 * Many thanks go to John Kohl <jtk@NetBSD.org> for finding bugs: the
36 * one responsible for the "realloccgblk: can't find blk in cyl"
37 * problem and a more minor one which left fs_dsize wrong when
38 * shrinking. (These actually indicate bugs in fsck too - it should
39 * have caught and fixed them.)
43 #include <sys/cdefs.h>
53 #include <sys/param.h> /* MAXFRAG */
54 #include <ufs/ffs/fs.h>
55 #include <ufs/ufs/dir.h>
56 #include <ufs/ufs/dinode.h>
57 #include <ufs/ufs/ufs_bswap.h> /* ufs_rw32 */
59 /* Suppress warnings about unused arguments */
60 #if defined(__GNUC__) && \
62 ( (__GNUC__ == 2) && \
63 defined(__GNUC_MINOR__) && \
64 (__GNUC_MINOR__ >= 7) ) )
65 #define UNUSED_ARG(x) x __unused
68 #define UNUSED_ARG(x) x
72 /* new size of filesystem, in sectors */
75 /* fd open onto disk device */
78 /* must we break up big I/O operations - see checksmallio() */
81 /* size of a cg, in bytes, rounded up to a frag boundary */
84 /* possible superblock localtions */
85 static int search
[] = SBLOCKSEARCH
;
86 /* location of the superblock */
90 static struct fs
*oldsb
; /* before we started */
91 static struct fs
*newsb
; /* copy to work with */
92 /* Buffer to hold the above. Make sure it's aligned correctly. */
93 static char sbbuf
[2 * SBLOCKSIZE
] __attribute__((__aligned__(__alignof__(struct fs
))));
95 /* a cg's worth of brand new squeaky-clean inodes */
96 static struct ufs1_dinode
*zinodes
;
98 /* pointers to the in-core cgs, read off disk and possibly modified */
99 static struct cg
**cgs
;
101 /* pointer to csum array - the stuff pointed to on-disk by fs_csaddr */
102 static struct csum
*csums
;
104 /* per-cg flags, indexed by cg number */
105 static unsigned char *cgflags
;
106 #define CGF_DIRTY 0x01 /* needs to be written to disk */
107 #define CGF_BLKMAPS 0x02 /* block bitmaps need rebuilding */
108 #define CGF_INOMAPS 0x04 /* inode bitmaps need rebuilding */
110 /* when shrinking, these two arrays record how we want blocks to move. */
111 /* if blkmove[i] is j, the frag that started out as frag #i should end */
112 /* up as frag #j. inomove[i]=j means, similarly, that the inode that */
113 /* started out as inode i should end up as inode j. */
114 static unsigned int *blkmove
;
115 static unsigned int *inomove
;
117 /* in-core copies of all inodes in the fs, indexed by inumber */
118 static struct ufs1_dinode
*inodes
;
120 /* per-inode flags, indexed by inumber */
121 static unsigned char *iflags
;
122 #define IF_DIRTY 0x01 /* needs to be written to disk */
123 #define IF_BDIRTY 0x02 /* like DIRTY, but is set on first inode in a
124 * block of inodes, and applies to the whole
127 /* Old FFS1 macros */
128 #define cg_blktot(cgp, ns) \
129 (cg_chkmagic(cgp, ns) ? \
130 ((int32_t *)((u_int8_t *)(cgp) + ufs_rw32((cgp)->cg_old_btotoff, (ns)))) \
131 : (((struct ocg *)(cgp))->cg_btot))
132 #define cg_blks(fs, cgp, cylno, ns) \
133 (cg_chkmagic(cgp, ns) ? \
134 ((int16_t *)((u_int8_t *)(cgp) + ufs_rw32((cgp)->cg_old_boff, (ns))) + \
135 (cylno) * (fs)->fs_old_nrpos) \
136 : (((struct ocg *)(cgp))->cg_b[cylno]))
137 #define cbtocylno(fs, bno) \
138 (fsbtodb(fs, bno) / (fs)->fs_old_spc)
139 #define cbtorpos(fs, bno) \
140 ((fs)->fs_old_nrpos <= 1 ? 0 : \
141 (fsbtodb(fs, bno) % (fs)->fs_old_spc / \
142 (fs)->fs_old_nsect * (fs)->fs_old_trackskew + \
143 fsbtodb(fs, bno) % (fs)->fs_old_spc % \
144 (fs)->fs_old_nsect * (fs)->fs_old_interleave) %\
145 (fs)->fs_old_nsect * (fs)->fs_old_nrpos / (fs)->fs_old_npsect)
146 #define dblksize(fs, dip, lbn) \
147 (((lbn) >= NDADDR || (dip)->di_size >= lblktosize(fs, (lbn) + 1)) \
149 : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
153 * Number of disk sectors per block/fragment; assumes DEV_BSIZE byte
156 #define NSPB(fs) ((fs)->fs_old_nspf << (fs)->fs_fragshift)
157 #define NSPF(fs) ((fs)->fs_old_nspf)
160 * See if we need to break up large I/O operations. This should never
161 * be needed, but under at least one <version,platform> combination,
162 * large enough disk transfers to the raw device hang. So if we're
163 * talking to a character special device, play it safe; in this case,
164 * readat() and writeat() break everything up into pieces no larger
165 * than 8K, doing multiple syscalls for larger operations.
173 smallio
= ((stb
.st_mode
& S_IFMT
) == S_IFCHR
);
176 * Read size bytes starting at blkno into buf. blkno is in DEV_BSIZE
177 * units, ie, after fsbtodb(); size is in bytes.
180 readat(off_t blkno
, void *buf
, int size
)
182 /* Seek to the correct place. */
183 if (lseek(fd
, blkno
* DEV_BSIZE
, L_SET
) < 0)
184 err(1, "lseek failed");
186 /* See if we have to break up the transfer... */
188 char *bp
; /* pointer into buf */
189 int left
; /* bytes left to go */
190 int n
; /* number to do this time around */
191 int rv
; /* syscall return value */
195 n
= (left
> 8192) ? 8192 : left
;
196 rv
= read(fd
, bp
, n
);
198 err(1, "read failed");
200 errx(1, "read: wanted %d, got %d", n
, rv
);
206 rv
= read(fd
, buf
, size
);
208 err(1, "read failed");
210 errx(1, "read: wanted %d, got %d", size
, rv
);
214 * Write size bytes from buf starting at blkno. blkno is in DEV_BSIZE
215 * units, ie, after fsbtodb(); size is in bytes.
218 writeat(off_t blkno
, const void *buf
, int size
)
220 /* Seek to the correct place. */
221 if (lseek(fd
, blkno
* DEV_BSIZE
, L_SET
) < 0)
222 err(1, "lseek failed");
223 /* See if we have to break up the transfer... */
225 const char *bp
; /* pointer into buf */
226 int left
; /* bytes left to go */
227 int n
; /* number to do this time around */
228 int rv
; /* syscall return value */
232 n
= (left
> 8192) ? 8192 : left
;
233 rv
= write(fd
, bp
, n
);
235 err(1, "write failed");
237 errx(1, "write: wanted %d, got %d", n
, rv
);
243 rv
= write(fd
, buf
, size
);
245 err(1, "write failed");
247 errx(1, "write: wanted %d, got %d", size
, rv
);
251 * Never-fail versions of malloc() and realloc(), and an allocation
252 * routine (which also never fails) for allocating memory that will
253 * never be freed until exit.
260 nfmalloc(size_t nb
, const char *tag
)
267 err(1, "Can't allocate %lu bytes for %s",
268 (unsigned long int) nb
, tag
);
271 * Never-fail realloc.
274 nfrealloc(void *blk
, size_t nb
, const char *tag
)
278 rv
= realloc(blk
, nb
);
281 err(1, "Can't re-allocate %lu bytes for %s",
282 (unsigned long int) nb
, tag
);
285 * Allocate memory that will never be freed or reallocated. Arguably
286 * this routine should handle small allocations by chopping up pages,
287 * but that's not worth the bother; it's not called more than a
288 * handful of times per run, and if the allocations are that small the
289 * waste in giving each one its own page is ignorable.
292 alloconce(size_t nb
, const char *tag
)
296 rv
= mmap(0, nb
, PROT_READ
| PROT_WRITE
, MAP_ANON
| MAP_PRIVATE
, -1, 0);
297 if (rv
!= MAP_FAILED
)
299 err(1, "Can't map %lu bytes for %s",
300 (unsigned long int) nb
, tag
);
303 * Load the cgs and csums off disk. Also allocates the space to load
304 * them into and initializes the per-cg flags.
312 cgblksz
= roundup(oldsb
->fs_cgsize
, oldsb
->fs_fsize
);
313 cgs
= nfmalloc(oldsb
->fs_ncg
* sizeof(struct cg
*), "cg pointers");
314 cgp
= alloconce(oldsb
->fs_ncg
* cgblksz
, "cgs");
315 cgflags
= nfmalloc(oldsb
->fs_ncg
, "cg flags");
316 csums
= nfmalloc(oldsb
->fs_cssize
, "cg summary");
317 for (cg
= 0; cg
< oldsb
->fs_ncg
; cg
++) {
318 cgs
[cg
] = (struct cg
*) cgp
;
319 readat(fsbtodb(oldsb
, cgtod(oldsb
, cg
)), cgp
, cgblksz
);
323 readat(fsbtodb(oldsb
, oldsb
->fs_csaddr
), csums
, oldsb
->fs_cssize
);
326 * Set n bits, starting with bit #base, in the bitmap pointed to by
327 * bitvec (which is assumed to be large enough to include bits base
331 set_bits(unsigned char *bitvec
, unsigned int base
, unsigned int n
)
334 return; /* nothing to do */
335 if (base
& 7) { /* partial byte at beginning */
336 if (n
<= 8 - (base
& 7)) { /* entirely within one byte */
337 bitvec
[base
>> 3] |= (~((~0U) << n
)) << (base
& 7);
340 bitvec
[base
>> 3] |= (~0U) << (base
& 7);
342 base
= (base
& ~7) + 8;
344 if (n
>= 8) { /* do full bytes */
345 memset(bitvec
+ (base
>> 3), 0xff, n
>> 3);
349 if (n
) { /* partial byte at end */
350 bitvec
[base
>> 3] |= ~((~0U) << n
);
354 * Clear n bits, starting with bit #base, in the bitmap pointed to by
355 * bitvec (which is assumed to be large enough to include bits base
356 * through base+n-1). Code parallels set_bits().
359 clr_bits(unsigned char *bitvec
, int base
, int n
)
364 if (n
<= 8 - (base
& 7)) {
365 bitvec
[base
>> 3] &= ~((~((~0U) << n
)) << (base
& 7));
368 bitvec
[base
>> 3] &= ~((~0U) << (base
& 7));
370 base
= (base
& ~7) + 8;
373 bzero(bitvec
+ (base
>> 3), n
>> 3);
378 bitvec
[base
>> 3] &= (~0U) << n
;
382 * Test whether bit #bit is set in the bitmap pointed to by bitvec.
385 bit_is_set(unsigned char *bitvec
, int bit
)
387 return (bitvec
[bit
>> 3] & (1 << (bit
& 7)));
390 * Test whether bit #bit is clear in the bitmap pointed to by bitvec.
393 bit_is_clr(unsigned char *bitvec
, int bit
)
395 return (!bit_is_set(bitvec
, bit
));
398 * Test whether a whole block of bits is set in a bitmap. This is
399 * designed for testing (aligned) disk blocks in a bit-per-frag
400 * bitmap; it has assumptions wired into it based on that, essentially
401 * that the entire block fits into a single byte. This returns true
402 * iff _all_ the bits are set; it is not just the complement of
403 * blk_is_clr on the same arguments (unless blkfrags==1).
406 blk_is_set(unsigned char *bitvec
, int blkbase
, int blkfrags
)
410 mask
= (~((~0U) << blkfrags
)) << (blkbase
& 7);
411 return ((bitvec
[blkbase
>> 3] & mask
) == mask
);
414 * Test whether a whole block of bits is clear in a bitmap. See
415 * blk_is_set (above) for assumptions. This returns true iff _all_
416 * the bits are clear; it is not just the complement of blk_is_set on
417 * the same arguments (unless blkfrags==1).
420 blk_is_clr(unsigned char *bitvec
, int blkbase
, int blkfrags
)
424 mask
= (~((~0U) << blkfrags
)) << (blkbase
& 7);
425 return ((bitvec
[blkbase
>> 3] & mask
) == 0);
428 * Initialize a new cg. Called when growing. Assumes memory has been
429 * allocated but not otherwise set up. This code sets the fields of
430 * the cg, initializes the bitmaps (and cluster summaries, if
431 * applicable), updates both per-cylinder summary info and the global
432 * summary info in newsb; it also writes out new inodes for the cg.
434 * This code knows it can never be called for cg 0, which makes it a
435 * bit simpler than it would otherwise be.
440 struct cg
*cg
; /* The in-core cg, of course */
441 int base
; /* Disk address of cg base */
442 int dlow
; /* Size of pre-cg data area */
443 int dhigh
; /* Offset of post-inode data area, from base */
444 int dmax
; /* Offset of end of post-inode data area */
445 int i
; /* Generic loop index */
446 int n
; /* Generic count */
449 /* Place the data areas */
450 base
= cgbase(newsb
, cgn
);
451 dlow
= cgsblock(newsb
, cgn
) - base
;
452 dhigh
= cgdmin(newsb
, cgn
) - base
;
453 dmax
= newsb
->fs_size
- base
;
454 if (dmax
> newsb
->fs_fpg
)
455 dmax
= newsb
->fs_fpg
;
457 * Clear out the cg - assumes all-0-bytes is the correct way
458 * to initialize fields we don't otherwise touch, which is
459 * perhaps not the right thing to do, but it's what fsck and
462 bzero(cg
, newsb
->fs_cgsize
);
463 cg
->cg_time
= newsb
->fs_time
;
464 cg
->cg_magic
= CG_MAGIC
;
466 cg
->cg_old_ncyl
= newsb
->fs_old_cpg
;
467 /* fsck whines if the cg->cg_old_ncyl value in the last cg is fs_old_cpg
468 * instead of zero, when fs_old_cpg is the correct value. */
469 /* XXX fix once fsck is fixed */
470 if ((cgn
== newsb
->fs_ncg
- 1) /* && (newsb->fs_old_ncyl % newsb->fs_old_cpg) */ ) {
471 cg
->cg_old_ncyl
= newsb
->fs_old_ncyl
% newsb
->fs_old_cpg
;
473 cg
->cg_niblk
= newsb
->fs_ipg
;
475 /* Set up the bitmap pointers. We have to be careful to lay out the
476 * cg _exactly_ the way mkfs and fsck do it, since fsck compares the
477 * _entire_ cg against a recomputed cg, and whines if there is any
478 * mismatch, including the bitmap offsets. */
479 /* XXX update this comment when fsck is fixed */
480 cg
->cg_old_btotoff
= &cg
->cg_space
[0] - (unsigned char *) cg
;
481 cg
->cg_old_boff
= cg
->cg_old_btotoff
482 + (newsb
->fs_old_cpg
* sizeof(int32_t));
483 cg
->cg_iusedoff
= cg
->cg_old_boff
+
484 (newsb
->fs_old_cpg
* newsb
->fs_old_nrpos
* sizeof(int16_t));
485 cg
->cg_freeoff
= cg
->cg_iusedoff
+ howmany(newsb
->fs_ipg
, NBBY
);
486 if (newsb
->fs_contigsumsize
> 0) {
487 cg
->cg_nclusterblks
= cg
->cg_ndblk
/ newsb
->fs_frag
;
488 cg
->cg_clustersumoff
= cg
->cg_freeoff
+
489 howmany(newsb
->fs_old_cpg
* newsb
->fs_old_spc
/ NSPF(newsb
),
490 NBBY
) - sizeof(int32_t);
491 cg
->cg_clustersumoff
=
492 roundup(cg
->cg_clustersumoff
, sizeof(int32_t));
493 cg
->cg_clusteroff
= cg
->cg_clustersumoff
+
494 ((newsb
->fs_contigsumsize
+ 1) * sizeof(int32_t));
495 cg
->cg_nextfreeoff
= cg
->cg_clusteroff
+
496 howmany(newsb
->fs_old_cpg
* newsb
->fs_old_spc
/ NSPB(newsb
),
498 n
= dlow
/ newsb
->fs_frag
;
500 set_bits(cg_clustersfree(cg
, 0), 0, n
);
501 cg_clustersum(cg
, 0)[(n
> newsb
->fs_contigsumsize
) ?
502 newsb
->fs_contigsumsize
: n
]++;
505 cg
->cg_nextfreeoff
= cg
->cg_freeoff
+
506 howmany(newsb
->fs_old_cpg
* newsb
->fs_old_spc
/ NSPF(newsb
),
509 /* Mark the data areas as free; everything else is marked busy by the
510 * bzero up at the top. */
511 set_bits(cg_blksfree(cg
, 0), 0, dlow
);
512 set_bits(cg_blksfree(cg
, 0), dhigh
, dmax
- dhigh
);
513 /* Initialize summary info */
514 cg
->cg_cs
.cs_ndir
= 0;
515 cg
->cg_cs
.cs_nifree
= newsb
->fs_ipg
;
516 cg
->cg_cs
.cs_nbfree
= dlow
/ newsb
->fs_frag
;
517 cg
->cg_cs
.cs_nffree
= 0;
519 /* This is the simplest way of doing this; we perhaps could compute
520 * the correct cg_blktot()[] and cg_blks()[] values other ways, but it
521 * would be complicated and hardly seems worth the effort. (The
522 * reason there isn't frag-at-beginning and frag-at-end code here,
523 * like the code below for the post-inode data area, is that the
524 * pre-sb data area always starts at 0, and thus is block-aligned, and
525 * always ends at the sb, which is block-aligned.) */
526 for (i
= 0; i
< dlow
; i
+= newsb
->fs_frag
) {
527 cg_blktot(cg
, 0)[cbtocylno(newsb
, i
)]++;
528 cg_blks(newsb
, cg
, cbtocylno(newsb
, i
), 0)[cbtorpos(newsb
, i
)]++;
530 /* Deal with a partial block at the beginning of the post-inode area.
531 * I'm not convinced this can happen - I think the inodes are always
532 * block-aligned and always an integral number of blocks - but it's
533 * cheap to do the right thing just in case. */
534 if (dhigh
% newsb
->fs_frag
) {
535 n
= newsb
->fs_frag
- (dhigh
% newsb
->fs_frag
);
537 cg
->cg_cs
.cs_nffree
+= n
;
540 n
= (dmax
- dhigh
) / newsb
->fs_frag
;
541 /* We have n full-size blocks in the post-inode data area. */
543 cg
->cg_cs
.cs_nbfree
+= n
;
544 if (newsb
->fs_contigsumsize
> 0) {
545 i
= dhigh
/ newsb
->fs_frag
;
546 set_bits(cg_clustersfree(cg
, 0), i
, n
);
547 cg_clustersum(cg
, 0)[(n
> newsb
->fs_contigsumsize
) ?
548 newsb
->fs_contigsumsize
: n
]++;
550 for (i
= n
; i
> 0; i
--) {
551 cg_blktot(cg
, 0)[cbtocylno(newsb
, dhigh
)]++;
553 cbtocylno(newsb
, dhigh
), 0)[cbtorpos(newsb
,
555 dhigh
+= newsb
->fs_frag
;
558 /* Deal with any leftover frag at the end of the cg. */
562 cg
->cg_cs
.cs_nffree
+= i
;
564 /* Update the csum info. */
565 csums
[cgn
] = cg
->cg_cs
;
566 newsb
->fs_cstotal
.cs_nffree
+= cg
->cg_cs
.cs_nffree
;
567 newsb
->fs_cstotal
.cs_nbfree
+= cg
->cg_cs
.cs_nbfree
;
568 newsb
->fs_cstotal
.cs_nifree
+= cg
->cg_cs
.cs_nifree
;
569 /* Write out the cleared inodes. */
570 writeat(fsbtodb(newsb
, cgimin(newsb
, cgn
)), zinodes
,
571 newsb
->fs_ipg
* sizeof(struct ufs1_dinode
));
573 cgflags
[cgn
] |= CGF_DIRTY
;
576 * Find free space, at least nfrags consecutive frags of it. Pays no
577 * attention to block boundaries, but refuses to straddle cg
578 * boundaries, even if the disk blocks involved are in fact
579 * consecutive. Return value is the frag number of the first frag of
580 * the block, or -1 if no space was found. Uses newsb for sb values,
581 * and assumes the cgs[] structures correctly describe the area to be
584 * XXX is there a bug lurking in the ignoring of block boundaries by
585 * the routine used by fragmove() in evict_data()? Can an end-of-file
586 * frag legally straddle a block boundary? If not, this should be
587 * cloned and fixed to stop at block boundaries for that use. The
588 * current one may still be needed for csum info motion, in case that
589 * takes up more than a whole block (is the csum info allowed to begin
590 * partway through a block and continue into the following block?).
592 * If we wrap off the end of the filesystem back to the beginning, we
593 * can end up searching the end of the filesystem twice. I ignore
594 * this inefficiency, since if that happens we're going to croak with
595 * a no-space error anyway, so it happens at most once.
598 find_freespace(unsigned int nfrags
)
600 static int hand
= 0; /* hand rotates through all frags in the fs */
601 int cgsize
; /* size of the cg hand currently points into */
602 int cgn
; /* number of cg hand currently points into */
603 int fwc
; /* frag-within-cg number of frag hand points
605 int run
; /* length of run of free frags seen so far */
606 int secondpass
; /* have we wrapped from end of fs to
608 unsigned char *bits
; /* cg_blksfree()[] for cg hand points into */
610 cgn
= dtog(newsb
, hand
);
611 fwc
= dtogd(newsb
, hand
);
612 secondpass
= (hand
== 0);
614 bits
= cg_blksfree(cgs
[cgn
], 0);
615 cgsize
= cgs
[cgn
]->cg_ndblk
;
617 if (bit_is_set(bits
, fwc
)) {
620 return (hand
+ 1 - run
);
629 if (cgn
>= newsb
->fs_ncg
) {
636 bits
= cg_blksfree(cgs
[cgn
], 0);
637 cgsize
= cgs
[cgn
]->cg_ndblk
;
643 * Find a free block of disk space. Finds an entire block of frags,
644 * all of which are free. Return value is the frag number of the
645 * first frag of the block, or -1 if no space was found. Uses newsb
646 * for sb values, and assumes the cgs[] structures correctly describe
647 * the area to be searched.
649 * See find_freespace(), above, for remarks about hand wrapping around.
654 static int hand
= 0; /* hand rotates through all frags in fs */
655 int cgn
; /* cg number of cg hand points into */
656 int fwc
; /* frag-within-cg number of frag hand points
658 int cgsize
; /* size of cg hand points into */
659 int secondpass
; /* have we wrapped from end to beginning? */
660 unsigned char *bits
; /* cg_blksfree()[] for cg hand points into */
662 cgn
= dtog(newsb
, hand
);
663 fwc
= dtogd(newsb
, hand
);
664 secondpass
= (hand
== 0);
665 bits
= cg_blksfree(cgs
[cgn
], 0);
666 cgsize
= blknum(newsb
, cgs
[cgn
]->cg_ndblk
);
668 if (blk_is_set(bits
, fwc
, newsb
->fs_frag
))
670 fwc
+= newsb
->fs_frag
;
671 hand
+= newsb
->fs_frag
;
675 if (cgn
>= newsb
->fs_ncg
) {
682 bits
= cg_blksfree(cgs
[cgn
], 0);
683 cgsize
= blknum(newsb
, cgs
[cgn
]->cg_ndblk
);
688 * Find a free inode, returning its inumber or -1 if none was found.
689 * Uses newsb for sb values, and assumes the cgs[] structures
690 * correctly describe the area to be searched.
692 * See find_freespace(), above, for remarks about hand wrapping around.
697 static int hand
= 0; /* hand rotates through all inodes in fs */
698 int cgn
; /* cg number of cg hand points into */
699 int iwc
; /* inode-within-cg number of inode hand points
701 int secondpass
; /* have we wrapped from end to beginning? */
702 unsigned char *bits
; /* cg_inosused()[] for cg hand points into */
704 cgn
= hand
/ newsb
->fs_ipg
;
705 iwc
= hand
% newsb
->fs_ipg
;
706 secondpass
= (hand
== 0);
707 bits
= cg_inosused(cgs
[cgn
], 0);
709 if (bit_is_clr(bits
, iwc
))
713 if (iwc
>= newsb
->fs_ipg
) {
716 if (cgn
>= newsb
->fs_ncg
) {
723 bits
= cg_inosused(cgs
[cgn
], 0);
728 * Mark a frag as free. Sets the frag's bit in the cg_blksfree bitmap
729 * for the appropriate cg, and marks the cg as dirty.
736 cgn
= dtog(newsb
, fno
);
737 set_bits(cg_blksfree(cgs
[cgn
], 0), dtogd(newsb
, fno
), 1);
738 cgflags
[cgn
] |= CGF_DIRTY
| CGF_BLKMAPS
;
741 * Allocate a frag. Clears the frag's bit in the cg_blksfree bitmap
742 * for the appropriate cg, and marks the cg as dirty.
749 cgn
= dtog(newsb
, fno
);
750 clr_bits(cg_blksfree(cgs
[cgn
], 0), dtogd(newsb
, fno
), 1);
751 cgflags
[cgn
] |= CGF_DIRTY
| CGF_BLKMAPS
;
754 * Fix up the csum array. If shrinking, this involves freeing zero or
755 * more frags; if growing, it involves allocating them, or if the
756 * frags being grown into aren't free, finding space elsewhere for the
757 * csum info. (If the number of occupied frags doesn't change,
758 * nothing happens here.)
763 int nold
; /* # frags in old csum info */
764 int ntot
; /* # frags in new csum info */
765 int nnew
; /* ntot-nold */
766 int newloc
; /* new location for csum info, if necessary */
767 int i
; /* generic loop index */
768 int j
; /* generic loop index */
769 int f
; /* "from" frag number, if moving */
770 int t
; /* "to" frag number, if moving */
771 int cgn
; /* cg number, used when shrinking */
773 ntot
= howmany(newsb
->fs_cssize
, newsb
->fs_fsize
);
774 nold
= howmany(oldsb
->fs_cssize
, newsb
->fs_fsize
);
776 /* First, if there's no change in frag counts, it's easy. */
779 /* Next, if we're shrinking, it's almost as easy. Just free up any
780 * frags in the old area we no longer need. */
782 for ((i
= newsb
->fs_csaddr
+ ntot
- 1), (j
= nnew
);
789 /* We must be growing. Check to see that the new csum area fits
790 * within the filesystem. I think this can never happen, since for
791 * the csum area to grow, we must be adding at least one cg, so the
792 * old csum area can't be this close to the end of the new filesystem.
793 * But it's a cheap check. */
794 /* XXX what if csum info is at end of cg and grows into next cg, what
795 * if it spills over onto the next cg's backup superblock? Can this
797 if (newsb
->fs_csaddr
+ ntot
<= newsb
->fs_size
) {
798 /* Okay, it fits - now, see if the space we want is free. */
799 for ((i
= newsb
->fs_csaddr
+ nold
), (j
= nnew
);
802 cgn
= dtog(newsb
, i
);
803 if (bit_is_clr(cg_blksfree(cgs
[cgn
], 0),
808 /* Win win - all the frags we want are free. Allocate
809 * 'em and we're all done. */
810 for ((i
= newsb
->fs_csaddr
+ ntot
- nnew
), (j
= nnew
); j
> 0; i
++, j
--) {
816 /* We have to move the csum info, sigh. Look for new space, free old
817 * space, and allocate new. Update fs_csaddr. We don't copy anything
818 * on disk at this point; the csum info will be written to the
819 * then-current fs_csaddr as part of the final flush. */
820 newloc
= find_freespace(ntot
);
822 printf("Sorry, no space available for new csums\n");
825 for (i
= 0, f
= newsb
->fs_csaddr
, t
= newloc
; i
< ntot
; i
++, f
++, t
++) {
831 newsb
->fs_csaddr
= newloc
;
834 * Recompute newsb->fs_dsize. Just scans all cgs, adding the number of
835 * data blocks in that cg to the total.
838 recompute_fs_dsize(void)
843 for (i
= 0; i
< newsb
->fs_ncg
; i
++) {
844 int dlow
; /* size of before-sb data area */
845 int dhigh
; /* offset of post-inode data area */
846 int dmax
; /* total size of cg */
847 int base
; /* base of cg, since cgsblock() etc add it in */
848 base
= cgbase(newsb
, i
);
849 dlow
= cgsblock(newsb
, i
) - base
;
850 dhigh
= cgdmin(newsb
, i
) - base
;
851 dmax
= newsb
->fs_size
- base
;
852 if (dmax
> newsb
->fs_fpg
)
853 dmax
= newsb
->fs_fpg
;
854 newsb
->fs_dsize
+= dlow
+ dmax
- dhigh
;
856 /* Space in cg 0 before cgsblock is boot area, not free space! */
857 newsb
->fs_dsize
-= cgsblock(newsb
, 0) - cgbase(newsb
, 0);
858 /* And of course the csum info takes up space. */
859 newsb
->fs_dsize
-= howmany(newsb
->fs_cssize
, newsb
->fs_fsize
);
862 * Return the current time. We call this and assign, rather than
863 * calling time() directly, as insulation against OSes where fs_time
875 * Grow the filesystem.
882 /* Update the timestamp. */
883 newsb
->fs_time
= timestamp();
884 /* Allocate and clear the new-inode area, in case we add any cgs. */
885 zinodes
= alloconce(newsb
->fs_ipg
* sizeof(struct ufs1_dinode
),
887 bzero(zinodes
, newsb
->fs_ipg
* sizeof(struct ufs1_dinode
));
888 /* Update the size. */
889 newsb
->fs_size
= dbtofsb(newsb
, newsize
);
890 /* Did we actually not grow? (This can happen if newsize is less than
891 * a frag larger than the old size - unlikely, but no excuse to
892 * misbehave if it happens.) */
893 if (newsb
->fs_size
== oldsb
->fs_size
)
895 /* Check that the new last sector (frag, actually) is writable. Since
896 * it's at least one frag larger than it used to be, we know we aren't
897 * overwriting anything important by this. (The choice of sbbuf as
898 * what to write is irrelevant; it's just something handy that's known
899 * to be at least one frag in size.) */
900 writeat(newsb
->fs_size
- 1, &sbbuf
, newsb
->fs_fsize
);
901 /* Update fs_old_ncyl and fs_ncg. */
902 newsb
->fs_old_ncyl
= (newsb
->fs_size
* NSPF(newsb
)) / newsb
->fs_old_spc
;
903 newsb
->fs_ncg
= howmany(newsb
->fs_old_ncyl
, newsb
->fs_old_cpg
);
904 /* Does the last cg end before the end of its inode area? There is no
905 * reason why this couldn't be handled, but it would complicate a lot
906 * of code (in all filesystem code - fsck, kernel, etc) because of the
907 * potential partial inode area, and the gain in space would be
908 * minimal, at most the pre-sb data area. */
909 if (cgdmin(newsb
, newsb
->fs_ncg
- 1) > newsb
->fs_size
) {
911 newsb
->fs_old_ncyl
= newsb
->fs_ncg
* newsb
->fs_old_cpg
;
912 newsb
->fs_size
= (newsb
->fs_old_ncyl
* newsb
->fs_old_spc
) / NSPF(newsb
);
913 printf("Warning: last cylinder group is too small;\n");
914 printf(" dropping it. New size = %lu.\n",
915 (unsigned long int) fsbtodb(newsb
, newsb
->fs_size
));
917 /* Find out how big the csum area is, and realloc csums if bigger. */
918 newsb
->fs_cssize
= fragroundup(newsb
,
919 newsb
->fs_ncg
* sizeof(struct csum
));
920 if (newsb
->fs_cssize
> oldsb
->fs_cssize
)
921 csums
= nfrealloc(csums
, newsb
->fs_cssize
, "new cg summary");
922 /* If we're adding any cgs, realloc structures and set up the new cgs. */
923 if (newsb
->fs_ncg
> oldsb
->fs_ncg
) {
925 cgs
= nfrealloc(cgs
, newsb
->fs_ncg
* sizeof(struct cg
*),
927 cgflags
= nfrealloc(cgflags
, newsb
->fs_ncg
, "cg flags");
928 bzero(cgflags
+ oldsb
->fs_ncg
, newsb
->fs_ncg
- oldsb
->fs_ncg
);
929 cgp
= alloconce((newsb
->fs_ncg
- oldsb
->fs_ncg
) * cgblksz
,
931 for (i
= oldsb
->fs_ncg
; i
< newsb
->fs_ncg
; i
++) {
932 cgs
[i
] = (struct cg
*) cgp
;
936 cgs
[oldsb
->fs_ncg
- 1]->cg_old_ncyl
= oldsb
->fs_old_cpg
;
937 cgflags
[oldsb
->fs_ncg
- 1] |= CGF_DIRTY
;
939 /* If the old fs ended partway through a cg, we have to update the old
940 * last cg (though possibly not to a full cg!). */
941 if (oldsb
->fs_size
% oldsb
->fs_fpg
) {
946 cg
= cgs
[oldsb
->fs_ncg
- 1];
947 cgflags
[oldsb
->fs_ncg
- 1] |= CGF_DIRTY
| CGF_BLKMAPS
;
948 prevcgtop
= oldsb
->fs_fpg
* (oldsb
->fs_ncg
- 1);
949 newcgsize
= newsb
->fs_size
- prevcgtop
;
950 if (newcgsize
> newsb
->fs_fpg
)
951 newcgsize
= newsb
->fs_fpg
;
952 oldcgsize
= oldsb
->fs_size
% oldsb
->fs_fpg
;
953 set_bits(cg_blksfree(cg
, 0), oldcgsize
, newcgsize
- oldcgsize
);
954 cg
->cg_old_ncyl
= howmany(newcgsize
* NSPF(newsb
), newsb
->fs_old_spc
);
955 cg
->cg_ndblk
= newcgsize
;
957 /* Fix up the csum info, if necessary. */
959 /* Make fs_dsize match the new reality. */
960 recompute_fs_dsize();
963 * Call (*fn)() for each inode, passing the inode and its inumber. The
964 * number of cylinder groups is pased in, so this can be used to map
965 * over either the old or the new filesystem's set of inodes.
968 map_inodes(void (*fn
) (struct ufs1_dinode
* di
, unsigned int, void *arg
), int ncg
, void *cbarg
) {
972 ni
= oldsb
->fs_ipg
* ncg
;
973 for (i
= 0; i
< ni
; i
++)
974 (*fn
) (inodes
+ i
, i
, cbarg
);
976 /* Values for the third argument to the map function for
977 * map_inode_data_blocks. MDB_DATA indicates the block is contains
978 * file data; MDB_INDIR_PRE and MDB_INDIR_POST indicate that it's an
979 * indirect block. The MDB_INDIR_PRE call is made before the indirect
980 * block pointers are followed and the pointed-to blocks scanned,
981 * MDB_INDIR_POST after.
984 #define MDB_INDIR_PRE 2
985 #define MDB_INDIR_POST 3
987 typedef void (*mark_callback_t
) (unsigned int blocknum
, unsigned int nfrags
, unsigned int blksize
, int opcode
);
989 /* Helper function - handles a data block. Calls the callback
990 * function and returns number of bytes occupied in file (actually,
991 * rounded up to a frag boundary). The name is historical. */
993 markblk(mark_callback_t fn
, struct ufs1_dinode
* di
, int bn
, off_t o
)
997 if (o
>= di
->di_size
)
999 sz
= dblksize(newsb
, di
, lblkno(newsb
, o
));
1000 nb
= (sz
> di
->di_size
- o
) ? di
->di_size
- o
: sz
;
1002 (*fn
) (bn
, numfrags(newsb
, sz
), nb
, MDB_DATA
);
1005 /* Helper function - handles an indirect block. Makes the
1006 * MDB_INDIR_PRE callback for the indirect block, loops over the
1007 * pointers and recurses, and makes the MDB_INDIR_POST callback.
1008 * Returns the number of bytes occupied in file, as does markblk().
1009 * For the sake of update_for_data_move(), we read the indirect block
1010 * _after_ making the _PRE callback. The name is historical. */
1012 markiblk(mark_callback_t fn
, struct ufs1_dinode
* di
, int bn
, off_t o
, int lev
)
1017 static int32_t indirblk1
[howmany(MAXBSIZE
, sizeof(int32_t))];
1018 static int32_t indirblk2
[howmany(MAXBSIZE
, sizeof(int32_t))];
1019 static int32_t indirblk3
[howmany(MAXBSIZE
, sizeof(int32_t))];
1020 static int32_t *indirblks
[3] = {
1021 &indirblk1
[0], &indirblk2
[0], &indirblk3
[0]
1024 return (markblk(fn
, di
, bn
, o
));
1026 for (i
= newsb
->fs_bsize
;
1028 i
*= NINDIR(newsb
), lev
--);
1031 (*fn
) (bn
, newsb
->fs_frag
, newsb
->fs_bsize
, MDB_INDIR_PRE
);
1032 readat(fsbtodb(newsb
, bn
), indirblks
[lev
], newsb
->fs_bsize
);
1034 for (i
= 0; i
< NINDIR(newsb
); i
++) {
1035 j
= markiblk(fn
, di
, indirblks
[lev
][i
], o
, lev
- 1);
1041 (*fn
) (bn
, newsb
->fs_frag
, newsb
->fs_bsize
, MDB_INDIR_POST
);
1047 * Call (*fn)() for each data block for an inode. This routine assumes
1048 * the inode is known to be of a type that has data blocks (file,
1049 * directory, or non-fast symlink). The called function is:
1051 * (*fn)(unsigned int blkno, unsigned int nf, unsigned int nb, int op)
1053 * where blkno is the frag number, nf is the number of frags starting
1054 * at blkno (always <= fs_frag), nb is the number of bytes that belong
1055 * to the file (usually nf*fs_frag, often less for the last block/frag
1059 map_inode_data_blocks(struct ufs1_dinode
* di
, mark_callback_t fn
)
1061 off_t o
; /* offset within inode */
1062 int inc
; /* increment for o - maybe should be off_t? */
1063 int b
; /* index within di_db[] and di_ib[] arrays */
1065 /* Scan the direct blocks... */
1067 for (b
= 0; b
< NDADDR
; b
++) {
1068 inc
= markblk(fn
, di
, di
->di_db
[b
], o
);
1073 /* ...and the indirect blocks. */
1075 for (b
= 0; b
< NIADDR
; b
++) {
1076 inc
= markiblk(fn
, di
, di
->di_ib
[b
], o
, b
);
1085 dblk_callback(struct ufs1_dinode
* di
, unsigned int inum
, void *arg
)
1088 fn
= (mark_callback_t
) arg
;
1089 switch (di
->di_mode
& IFMT
) {
1091 if (di
->di_size
> newsb
->fs_maxsymlinklen
) {
1094 map_inode_data_blocks(di
, fn
);
1100 * Make a callback call, a la map_inode_data_blocks, for all data
1101 * blocks in the entire fs. This is used only once, in
1102 * update_for_data_move, but it's out at top level because the complex
1103 * downward-funarg nesting that would otherwise result seems to give
1104 * gcc gastric distress.
1107 map_data_blocks(mark_callback_t fn
, int ncg
)
1109 map_inodes(&dblk_callback
, ncg
, (void *) fn
);
1112 * Initialize the blkmove array.
1119 blkmove
= alloconce(oldsb
->fs_size
* sizeof(*blkmove
), "blkmove");
1120 for (i
= 0; i
< oldsb
->fs_size
; i
++)
1124 * Load the inodes off disk. Allocates the structures and initializes
1125 * them - the inodes from disk, the flags to zero.
1131 struct ufs1_dinode
*iptr
;
1133 inodes
= alloconce(oldsb
->fs_ncg
* oldsb
->fs_ipg
* sizeof(struct ufs1_dinode
), "inodes");
1134 iflags
= alloconce(oldsb
->fs_ncg
* oldsb
->fs_ipg
, "inode flags");
1135 bzero(iflags
, oldsb
->fs_ncg
* oldsb
->fs_ipg
);
1137 for (cg
= 0; cg
< oldsb
->fs_ncg
; cg
++) {
1138 readat(fsbtodb(oldsb
, cgimin(oldsb
, cg
)), iptr
,
1139 oldsb
->fs_ipg
* sizeof(struct ufs1_dinode
));
1140 iptr
+= oldsb
->fs_ipg
;
1144 * Report a filesystem-too-full problem.
1149 printf("Sorry, would run out of data blocks\n");
1153 * Record a desire to move "n" frags from "from" to "to".
1156 mark_move(unsigned int from
, unsigned int to
, unsigned int n
)
1159 blkmove
[from
++] = to
++;
1161 /* Helper function - evict n frags, starting with start (cg-relative).
1162 * The free bitmap is scanned, unallocated frags are ignored, and
1163 * each block of consecutive allocated frags is moved as a unit.
1166 fragmove(struct cg
* cg
, int base
, unsigned int start
, unsigned int n
)
1171 for (i
= 0; i
<= n
; i
++) {
1172 if ((i
< n
) && bit_is_clr(cg_blksfree(cg
, 0), start
+ i
)) {
1177 off
= find_freespace(run
);
1180 mark_move(base
+ start
+ i
- run
, off
, run
);
1181 set_bits(cg_blksfree(cg
, 0), start
+ i
- run
,
1183 clr_bits(cg_blksfree(cgs
[dtog(oldsb
, off
)], 0),
1184 dtogd(oldsb
, off
), run
);
1191 * Evict all data blocks from the given cg, starting at minfrag (based
1192 * at the beginning of the cg), for length nfrag. The eviction is
1193 * assumed to be entirely data-area; this should not be called with a
1194 * range overlapping the metadata structures in the cg. It also
1195 * assumes minfrag points into the given cg; it will misbehave if this
1198 * See the comment header on find_freespace() for one possible bug
1202 evict_data(struct cg
* cg
, unsigned int minfrag
, unsigned int nfrag
)
1204 int base
; /* base of cg (in frags from beginning of fs) */
1207 base
= cgbase(oldsb
, cg
->cg_cgx
);
1208 /* Does the boundary fall in the middle of a block? To avoid breaking
1209 * between frags allocated as consecutive, we always evict the whole
1210 * block in this case, though one could argue we should check to see
1211 * if the frag before or after the break is unallocated. */
1212 if (minfrag
% oldsb
->fs_frag
) {
1214 n
= minfrag
% oldsb
->fs_frag
;
1218 /* Do whole blocks. If a block is wholly free, skip it; if wholly
1219 * allocated, move it in toto. If neither, call fragmove() to move
1220 * the frags to new locations. */
1221 while (nfrag
>= oldsb
->fs_frag
) {
1222 if (!blk_is_set(cg_blksfree(cg
, 0), minfrag
, oldsb
->fs_frag
)) {
1223 if (blk_is_clr(cg_blksfree(cg
, 0), minfrag
,
1226 off
= find_freeblock();
1229 mark_move(base
+ minfrag
, off
, oldsb
->fs_frag
);
1230 set_bits(cg_blksfree(cg
, 0), minfrag
,
1232 clr_bits(cg_blksfree(cgs
[dtog(oldsb
, off
)], 0),
1233 dtogd(oldsb
, off
), oldsb
->fs_frag
);
1235 fragmove(cg
, base
, minfrag
, oldsb
->fs_frag
);
1238 minfrag
+= oldsb
->fs_frag
;
1239 nfrag
-= oldsb
->fs_frag
;
1241 /* Clean up any sub-block amount left over. */
1243 fragmove(cg
, base
, minfrag
, nfrag
);
1247 * Move all data blocks according to blkmove. We have to be careful,
1248 * because we may be updating indirect blocks that will themselves be
1249 * getting moved, or inode int32_t arrays that point to indirect
1250 * blocks that will be moved. We call this before
1251 * update_for_data_move, and update_for_data_move does inodes first,
1252 * then indirect blocks in preorder, so as to make sure that the
1253 * filesystem is self-consistent at all points, for better crash
1254 * tolerance. (We can get away with this only because all the writes
1255 * done by perform_data_move() are writing into space that's not used
1256 * by the old filesystem.) If we crash, some things may point to the
1257 * old data and some to the new, but both copies are the same. The
1258 * only wrong things should be csum info and free bitmaps, which fsck
1259 * is entirely capable of cleaning up.
1261 * Since blkmove_init() initializes all blocks to move to their current
1262 * locations, we can have two blocks marked as wanting to move to the
1263 * same location, but only two and only when one of them is the one
1264 * that was already there. So if blkmove[i]==i, we ignore that entry
1265 * entirely - for unallocated blocks, we don't want it (and may be
1266 * putting something else there), and for allocated blocks, we don't
1267 * want to copy it anywhere.
1270 perform_data_move(void)
1277 maxrun
= sizeof(buf
) / newsb
->fs_fsize
;
1279 for (i
= 0; i
< oldsb
->fs_size
; i
++) {
1280 if ((blkmove
[i
] == i
) ||
1283 (blkmove
[i
] != blkmove
[i
- 1] + 1))) {
1285 readat(fsbtodb(oldsb
, i
- run
), &buf
[0],
1286 run
<< oldsb
->fs_fshift
);
1287 writeat(fsbtodb(oldsb
, blkmove
[i
- run
]),
1288 &buf
[0], run
<< oldsb
->fs_fshift
);
1292 if (blkmove
[i
] != i
)
1296 readat(fsbtodb(oldsb
, i
- run
), &buf
[0],
1297 run
<< oldsb
->fs_fshift
);
1298 writeat(fsbtodb(oldsb
, blkmove
[i
- run
]), &buf
[0],
1299 run
<< oldsb
->fs_fshift
);
1303 * This modifies an array of int32_t, according to blkmove. This is
1304 * used to update inode block arrays and indirect blocks to point to
1305 * the new locations of data blocks.
1307 * Return value is the number of int32_ts that needed updating; in
1308 * particular, the return value is zero iff nothing was modified.
1311 movemap_blocks(int32_t * vec
, int n
)
1316 for (; n
> 0; n
--, vec
++) {
1317 if (blkmove
[*vec
] != *vec
) {
1318 *vec
= blkmove
[*vec
];
1325 moveblocks_callback(struct ufs1_dinode
* di
, unsigned int inum
, void *arg
)
1327 switch (di
->di_mode
& IFMT
) {
1329 if (di
->di_size
> oldsb
->fs_maxsymlinklen
) {
1332 /* don't || these two calls; we need their
1334 if (movemap_blocks(&di
->di_db
[0], NDADDR
)) {
1335 iflags
[inum
] |= IF_DIRTY
;
1337 if (movemap_blocks(&di
->di_ib
[0], NIADDR
)) {
1338 iflags
[inum
] |= IF_DIRTY
;
1346 moveindir_callback(unsigned int off
, unsigned int nfrag
, unsigned int nbytes
, int kind
)
1348 if (kind
== MDB_INDIR_PRE
) {
1349 int32_t blk
[howmany(MAXBSIZE
, sizeof(int32_t))];
1350 readat(fsbtodb(oldsb
, off
), &blk
[0], oldsb
->fs_bsize
);
1351 if (movemap_blocks(&blk
[0], NINDIR(oldsb
))) {
1352 writeat(fsbtodb(oldsb
, off
), &blk
[0], oldsb
->fs_bsize
);
1357 * Update all inode data arrays and indirect blocks to point to the new
1358 * locations of data blocks. See the comment header on
1359 * perform_data_move for some ordering considerations.
1362 update_for_data_move(void)
1364 map_inodes(&moveblocks_callback
, oldsb
->fs_ncg
, NULL
);
1365 map_data_blocks(&moveindir_callback
, oldsb
->fs_ncg
);
1368 * Initialize the inomove array.
1375 inomove
= alloconce(oldsb
->fs_ipg
* oldsb
->fs_ncg
* sizeof(*inomove
),
1377 for (i
= (oldsb
->fs_ipg
* oldsb
->fs_ncg
) - 1; i
>= 0; i
--)
1381 * Flush all dirtied inodes to disk. Scans the inode flags array; for
1382 * each dirty inode, it sets the BDIRTY bit on the first inode in the
1383 * block containing the dirty inode. Then it scans by blocks, and for
1384 * each marked block, writes it.
1393 ni
= newsb
->fs_ipg
* newsb
->fs_ncg
;
1394 m
= INOPB(newsb
) - 1;
1395 for (i
= 0; i
< ni
; i
++) {
1396 if (iflags
[i
] & IF_DIRTY
) {
1397 iflags
[i
& ~m
] |= IF_BDIRTY
;
1401 for (i
= 0; i
< ni
; i
+= m
) {
1402 if (iflags
[i
] & IF_BDIRTY
) {
1403 writeat(fsbtodb(newsb
, ino_to_fsba(newsb
, i
)),
1404 inodes
+ i
, newsb
->fs_bsize
);
1409 * Evict all inodes from the specified cg. shrink() already checked
1410 * that there were enough free inodes, so the no-free-inodes check is
1411 * a can't-happen. If it does trip, the filesystem should be in good
1412 * enough shape for fsck to fix; see the comment on perform_data_move
1413 * for the considerations in question.
1416 evict_inodes(struct cg
* cg
)
1422 inum
= newsb
->fs_ipg
* cg
->cg_cgx
;
1423 for (i
= 0; i
< newsb
->fs_ipg
; i
++, inum
++) {
1424 if (inodes
[inum
].di_mode
!= 0) {
1425 fi
= find_freeinode();
1427 printf("Sorry, inodes evaporated - "
1428 "filesystem probably needs fsck\n");
1432 clr_bits(cg_inosused(cg
, 0), i
, 1);
1433 set_bits(cg_inosused(cgs
[ino_to_cg(newsb
, fi
)], 0),
1434 fi
% newsb
->fs_ipg
, 1);
1439 * Move inodes from old locations to new. Does not actually write
1440 * anything to disk; just copies in-core and sets dirty bits.
1442 * We have to be careful here for reasons similar to those mentioned in
1443 * the comment header on perform_data_move, above: for the sake of
1444 * crash tolerance, we want to make sure everything is present at both
1445 * old and new locations before we update pointers. So we call this
1446 * first, then flush_inodes() to get them out on disk, then update
1447 * directories to match.
1450 perform_inode_move(void)
1455 ni
= oldsb
->fs_ipg
* oldsb
->fs_ncg
;
1456 for (i
= 0; i
< ni
; i
++) {
1457 if (inomove
[i
] != i
) {
1458 inodes
[inomove
[i
]] = inodes
[i
];
1459 iflags
[inomove
[i
]] = iflags
[i
] | IF_DIRTY
;
1464 * Update the directory contained in the nb bytes at buf, to point to
1465 * inodes' new locations.
1468 update_dirents(char *buf
, int nb
)
1471 #define d ((struct direct *)buf)
1475 if (inomove
[d
->d_ino
] != d
->d_ino
) {
1477 d
->d_ino
= inomove
[d
->d_ino
];
1486 * Callback function for map_inode_data_blocks, for updating a
1487 * directory to point to new inode locations.
1490 update_dir_data(unsigned int bn
, unsigned int size
, unsigned int nb
, int kind
)
1492 if (kind
== MDB_DATA
) {
1497 readat(fsbtodb(oldsb
, bn
), &buf
, size
<< oldsb
->fs_fshift
);
1498 if (update_dirents((char *) &buf
, nb
)) {
1499 writeat(fsbtodb(oldsb
, bn
), &buf
,
1500 size
<< oldsb
->fs_fshift
);
1505 dirmove_callback(struct ufs1_dinode
* di
, unsigned int inum
, void *arg
)
1507 switch (di
->di_mode
& IFMT
) {
1509 map_inode_data_blocks(di
, &update_dir_data
);
1514 * Update directory entries to point to new inode locations.
1517 update_for_inode_move(void)
1519 map_inodes(&dirmove_callback
, newsb
->fs_ncg
, NULL
);
1522 * Shrink the filesystem.
1529 /* Load the inodes off disk - we'll need 'em. */
1531 /* Update the timestamp. */
1532 newsb
->fs_time
= timestamp();
1533 /* Update the size figures. */
1534 newsb
->fs_size
= dbtofsb(newsb
, newsize
);
1535 newsb
->fs_old_ncyl
= (newsb
->fs_size
* NSPF(newsb
)) / newsb
->fs_old_spc
;
1536 newsb
->fs_ncg
= howmany(newsb
->fs_old_ncyl
, newsb
->fs_old_cpg
);
1537 /* Does the (new) last cg end before the end of its inode area? See
1538 * the similar code in grow() for more on this. */
1539 if (cgdmin(newsb
, newsb
->fs_ncg
- 1) > newsb
->fs_size
) {
1541 newsb
->fs_old_ncyl
= newsb
->fs_ncg
* newsb
->fs_old_cpg
;
1542 newsb
->fs_size
= (newsb
->fs_old_ncyl
* newsb
->fs_old_spc
) / NSPF(newsb
);
1543 printf("Warning: last cylinder group is too small;\n");
1544 printf(" dropping it. New size = %lu.\n",
1545 (unsigned long int) fsbtodb(newsb
, newsb
->fs_size
));
1547 /* Let's make sure we're not being shrunk into oblivion. */
1548 if (newsb
->fs_ncg
< 1) {
1549 printf("Size too small - filesystem would have no cylinders\n");
1552 /* Initialize for block motion. */
1554 /* Update csum size, then fix up for the new size */
1555 newsb
->fs_cssize
= fragroundup(newsb
,
1556 newsb
->fs_ncg
* sizeof(struct csum
));
1558 /* Evict data from any cgs being wholly eliminated */
1559 for (i
= newsb
->fs_ncg
; i
< oldsb
->fs_ncg
; i
++) {
1564 base
= cgbase(oldsb
, i
);
1565 dlow
= cgsblock(oldsb
, i
) - base
;
1566 dhigh
= cgdmin(oldsb
, i
) - base
;
1567 dmax
= oldsb
->fs_size
- base
;
1568 if (dmax
> cgs
[i
]->cg_ndblk
)
1569 dmax
= cgs
[i
]->cg_ndblk
;
1570 evict_data(cgs
[i
], 0, dlow
);
1571 evict_data(cgs
[i
], dhigh
, dmax
- dhigh
);
1572 newsb
->fs_cstotal
.cs_ndir
-= cgs
[i
]->cg_cs
.cs_ndir
;
1573 newsb
->fs_cstotal
.cs_nifree
-= cgs
[i
]->cg_cs
.cs_nifree
;
1574 newsb
->fs_cstotal
.cs_nffree
-= cgs
[i
]->cg_cs
.cs_nffree
;
1575 newsb
->fs_cstotal
.cs_nbfree
-= cgs
[i
]->cg_cs
.cs_nbfree
;
1577 /* Update the new last cg. */
1578 cgs
[newsb
->fs_ncg
- 1]->cg_ndblk
= newsb
->fs_size
-
1579 ((newsb
->fs_ncg
- 1) * newsb
->fs_fpg
);
1580 /* Is the new last cg partial? If so, evict any data from the part
1581 * being shrunken away. */
1582 if (newsb
->fs_size
% newsb
->fs_fpg
) {
1586 cg
= cgs
[newsb
->fs_ncg
- 1];
1587 newcgsize
= newsb
->fs_size
% newsb
->fs_fpg
;
1588 oldcgsize
= oldsb
->fs_size
- ((newsb
->fs_ncg
- 1) & oldsb
->fs_fpg
);
1589 if (oldcgsize
> oldsb
->fs_fpg
)
1590 oldcgsize
= oldsb
->fs_fpg
;
1591 evict_data(cg
, newcgsize
, oldcgsize
- newcgsize
);
1592 clr_bits(cg_blksfree(cg
, 0), newcgsize
, oldcgsize
- newcgsize
);
1594 /* Find out whether we would run out of inodes. (Note we haven't
1595 * actually done anything to the filesystem yet; all those evict_data
1596 * calls just update blkmove.) */
1600 for (i
= 0; i
< newsb
->fs_ncg
; i
++)
1601 slop
+= cgs
[i
]->cg_cs
.cs_nifree
;
1602 for (; i
< oldsb
->fs_ncg
; i
++)
1603 slop
-= oldsb
->fs_ipg
- cgs
[i
]->cg_cs
.cs_nifree
;
1605 printf("Sorry, would run out of inodes\n");
1609 /* Copy data, then update pointers to data. See the comment header on
1610 * perform_data_move for ordering considerations. */
1611 perform_data_move();
1612 update_for_data_move();
1613 /* Now do inodes. Initialize, evict, move, update - see the comment
1614 * header on perform_inode_move. */
1616 for (i
= newsb
->fs_ncg
; i
< oldsb
->fs_ncg
; i
++)
1617 evict_inodes(cgs
[i
]);
1618 perform_inode_move();
1620 update_for_inode_move();
1621 /* Recompute all the bitmaps; most of them probably need it anyway,
1622 * the rest are just paranoia and not wanting to have to bother
1623 * keeping track of exactly which ones require it. */
1624 for (i
= 0; i
< newsb
->fs_ncg
; i
++)
1625 cgflags
[i
] |= CGF_DIRTY
| CGF_BLKMAPS
| CGF_INOMAPS
;
1626 /* Update the cg_old_ncyl value for the last cylinder. The condition is
1627 * commented out because fsck whines if not - see the similar
1628 * condition in grow() for more. */
1629 /* XXX fix once fsck is fixed */
1630 /* if (newsb->fs_old_ncyl % newsb->fs_old_cpg) XXX */
1632 cgs
[newsb
->fs_ncg
- 1]->cg_old_ncyl
=
1633 newsb
->fs_old_ncyl
% newsb
->fs_old_cpg
;
1634 /* Make fs_dsize match the new reality. */
1635 recompute_fs_dsize();
1638 * Recompute the block totals, block cluster summaries, and rotational
1639 * position summaries, for a given cg (specified by number), based on
1640 * its free-frag bitmap (cg_blksfree()[]).
1643 rescan_blkmaps(int cgn
)
1654 /* Subtract off the current totals from the sb's summary info */
1655 newsb
->fs_cstotal
.cs_nffree
-= cg
->cg_cs
.cs_nffree
;
1656 newsb
->fs_cstotal
.cs_nbfree
-= cg
->cg_cs
.cs_nbfree
;
1657 /* Clear counters and bitmaps. */
1658 cg
->cg_cs
.cs_nffree
= 0;
1659 cg
->cg_cs
.cs_nbfree
= 0;
1660 bzero(&cg
->cg_frsum
[0], MAXFRAG
* sizeof(cg
->cg_frsum
[0]));
1661 bzero(&cg_blktot(cg
, 0)[0],
1662 newsb
->fs_old_cpg
* sizeof(cg_blktot(cg
, 0)[0]));
1663 bzero(&cg_blks(newsb
, cg
, 0, 0)[0],
1664 newsb
->fs_old_cpg
* newsb
->fs_old_nrpos
*
1665 sizeof(cg_blks(newsb
, cg
, 0, 0)[0]));
1666 if (newsb
->fs_contigsumsize
> 0) {
1667 cg
->cg_nclusterblks
= cg
->cg_ndblk
/ newsb
->fs_frag
;
1668 bzero(&cg_clustersum(cg
, 0)[1],
1669 newsb
->fs_contigsumsize
*
1670 sizeof(cg_clustersum(cg
, 0)[1]));
1671 bzero(&cg_clustersfree(cg
, 0)[0],
1672 howmany((newsb
->fs_old_cpg
* newsb
->fs_old_spc
) / NSPB(newsb
),
1675 /* Scan the free-frag bitmap. Runs of free frags are kept track of
1676 * with fragrun, and recorded into cg_frsum[] and cg_cs.cs_nffree; on
1677 * each block boundary, entire free blocks are recorded as well. */
1684 while (f
< cg
->cg_ndblk
) {
1685 if (bit_is_set(cg_blksfree(cg
, 0), f
)) {
1690 cg
->cg_frsum
[fragrun
]++;
1691 cg
->cg_cs
.cs_nffree
+= fragrun
;
1697 if (fwb
>= newsb
->fs_frag
) {
1699 cg
->cg_cs
.cs_nbfree
++;
1700 if (newsb
->fs_contigsumsize
> 0)
1701 set_bits(cg_clustersfree(cg
, 0), b
, 1);
1702 cg_blktot(cg
, 0)[cbtocylno(newsb
, f
- newsb
->fs_frag
)]++;
1704 cbtocylno(newsb
, f
- newsb
->fs_frag
),
1705 0)[cbtorpos(newsb
, f
- newsb
->fs_frag
)]++;
1709 cg
->cg_frsum
[fragrun
]++;
1710 cg
->cg_cs
.cs_nffree
+= fragrun
;
1712 if (newsb
->fs_contigsumsize
> 0) {
1714 cg_clustersum(cg
, 0)[(blkrun
> newsb
->fs_contigsumsize
) ? newsb
->fs_contigsumsize
: blkrun
]++;
1726 cg
->cg_frsum
[fragrun
]++;
1727 cg
->cg_cs
.cs_nffree
+= fragrun
;
1729 if ((blkrun
> 0) && (newsb
->fs_contigsumsize
> 0)) {
1730 cg_clustersum(cg
, 0)[(blkrun
> newsb
->fs_contigsumsize
) ?
1731 newsb
->fs_contigsumsize
: blkrun
]++;
1734 * Put the updated summary info back into csums, and add it
1735 * back into the sb's summary info. Then mark the cg dirty.
1737 csums
[cgn
] = cg
->cg_cs
;
1738 newsb
->fs_cstotal
.cs_nffree
+= cg
->cg_cs
.cs_nffree
;
1739 newsb
->fs_cstotal
.cs_nbfree
+= cg
->cg_cs
.cs_nbfree
;
1740 cgflags
[cgn
] |= CGF_DIRTY
;
1743 * Recompute the cg_inosused()[] bitmap, and the cs_nifree and cs_ndir
1744 * values, for a cg, based on the in-core inodes for that cg.
1747 rescan_inomaps(int cgn
)
1754 newsb
->fs_cstotal
.cs_ndir
-= cg
->cg_cs
.cs_ndir
;
1755 newsb
->fs_cstotal
.cs_nifree
-= cg
->cg_cs
.cs_nifree
;
1756 cg
->cg_cs
.cs_ndir
= 0;
1757 cg
->cg_cs
.cs_nifree
= 0;
1758 bzero(&cg_inosused(cg
, 0)[0], howmany(newsb
->fs_ipg
, NBBY
));
1759 inum
= cgn
* newsb
->fs_ipg
;
1761 set_bits(cg_inosused(cg
, 0), 0, 2);
1767 for (; iwc
< newsb
->fs_ipg
; iwc
++, inum
++) {
1768 switch (inodes
[inum
].di_mode
& IFMT
) {
1770 cg
->cg_cs
.cs_nifree
++;
1773 cg
->cg_cs
.cs_ndir
++;
1776 set_bits(cg_inosused(cg
, 0), iwc
, 1);
1780 csums
[cgn
] = cg
->cg_cs
;
1781 newsb
->fs_cstotal
.cs_ndir
+= cg
->cg_cs
.cs_ndir
;
1782 newsb
->fs_cstotal
.cs_nifree
+= cg
->cg_cs
.cs_nifree
;
1783 cgflags
[cgn
] |= CGF_DIRTY
;
1786 * Flush cgs to disk, recomputing anything they're marked as needing.
1793 for (i
= 0; i
< newsb
->fs_ncg
; i
++) {
1794 if (cgflags
[i
] & CGF_BLKMAPS
) {
1797 if (cgflags
[i
] & CGF_INOMAPS
) {
1800 if (cgflags
[i
] & CGF_DIRTY
) {
1801 cgs
[i
]->cg_rotor
= 0;
1802 cgs
[i
]->cg_frotor
= 0;
1803 cgs
[i
]->cg_irotor
= 0;
1804 writeat(fsbtodb(newsb
, cgtod(newsb
, i
)), cgs
[i
],
1808 writeat(fsbtodb(newsb
, newsb
->fs_csaddr
), csums
, newsb
->fs_cssize
);
1811 * Write the superblock, both to the main superblock and to each cg's
1812 * alternative superblock.
1819 writeat(where
/ DEV_BSIZE
, newsb
, SBLOCKSIZE
);
1820 for (i
= 0; i
< newsb
->fs_ncg
; i
++) {
1821 writeat(fsbtodb(newsb
, cgsblock(newsb
, i
)), newsb
, SBLOCKSIZE
);
1827 int main(int, char **);
1829 main(int ac
, char **av
)
1833 fprintf(stderr
, "usage: %s filesystem new-size\n",
1837 fd
= open(av
[1], O_RDWR
, 0);
1839 err(1, "Cannot open `%s'", av
[1]);
1841 newsize
= atoi(av
[2]);
1842 oldsb
= (struct fs
*) & sbbuf
;
1843 newsb
= (struct fs
*) (SBLOCKSIZE
+ (char *) &sbbuf
);
1844 for (where
= search
[i
= 0]; search
[i
] != -1; where
= search
[++i
]) {
1845 readat(where
/ DEV_BSIZE
, oldsb
, SBLOCKSIZE
);
1846 if (oldsb
->fs_magic
== FS_UFS1_MAGIC
)
1848 if (where
== SBLOCK_UFS2
)
1850 if (oldsb
->fs_old_flags
& FS_FLAGS_UPDATED
)
1851 err(1, "Cannot resize ffsv2 format suberblock!");
1853 if (where
== (off_t
)-1)
1854 errx(1, "Bad magic number");
1855 oldsb
->fs_qbmask
= ~(int64_t) oldsb
->fs_bmask
;
1856 oldsb
->fs_qfmask
= ~(int64_t) oldsb
->fs_fmask
;
1857 if (oldsb
->fs_ipg
% INOPB(oldsb
)) {
1858 printf("ipg[%d] %% INOPB[%d] != 0\n", (int) oldsb
->fs_ipg
,
1859 (int) INOPB(oldsb
));
1862 /* The superblock is bigger than struct fs (there are trailing tables,
1863 * of non-fixed size); make sure we copy the whole thing. SBLOCKSIZE may
1864 * be an over-estimate, but we do this just once, so being generous is
1866 bcopy(oldsb
, newsb
, SBLOCKSIZE
);
1868 if (newsize
> fsbtodb(oldsb
, oldsb
->fs_size
)) {
1870 } else if (newsize
< fsbtodb(oldsb
, oldsb
->fs_size
)) {