remove extra mkfs.1
[minix.git] / sys / ufs / ffs / ffs_balloc.c
blob3683cbb19218afd7af184bcda4dd576a7773401d
1 /* $NetBSD: ffs_balloc.c,v 1.54 2011/04/23 07:36:02 hannken Exp $ */
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.54 2011/04/23 07:36:02 hannken Exp $");
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
68 #include <uvm/uvm.h>
70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71 struct buf **);
72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73 struct buf **);
76 * Balloc defines the structure of file system storage
77 * by allocating the physical blocks on a device given
78 * the inode and the logical block number in a file.
81 int
82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
83 struct buf **bpp)
85 int error;
87 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
88 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
89 else
90 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
92 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
93 brelse(*bpp, 0);
95 return error;
98 static int
99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
100 int flags, struct buf **bpp)
102 daddr_t lbn, lastlbn;
103 struct buf *bp, *nbp;
104 struct inode *ip = VTOI(vp);
105 struct fs *fs = ip->i_fs;
106 struct ufsmount *ump = ip->i_ump;
107 struct indir indirs[NIADDR + 2];
108 daddr_t newb, pref, nb;
109 int32_t *bap; /* XXX ondisk32 */
110 int deallocated, osize, nsize, num, i, error;
111 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
112 int32_t *allocib;
113 int unwindidx = -1;
114 #ifdef FFS_EI
115 const int needswap = UFS_FSNEEDSWAP(fs);
116 #endif
117 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
119 lbn = lblkno(fs, off);
120 size = blkoff(fs, off) + size;
121 if (size > fs->fs_bsize)
122 panic("ffs_balloc: blk too big");
123 if (bpp != NULL) {
124 *bpp = NULL;
126 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
128 if (lbn < 0)
129 return (EFBIG);
132 * If the next write will extend the file into a new block,
133 * and the file is currently composed of a fragment
134 * this fragment has to be extended to be a full block.
137 lastlbn = lblkno(fs, ip->i_size);
138 if (lastlbn < NDADDR && lastlbn < lbn) {
139 nb = lastlbn;
140 osize = blksize(fs, ip, nb);
141 if (osize < fs->fs_bsize && osize > 0) {
142 mutex_enter(&ump->um_lock);
143 error = ffs_realloccg(ip, nb,
144 ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
145 &ip->i_ffs1_db[0]),
146 osize, (int)fs->fs_bsize, cred, bpp, &newb);
147 if (error)
148 return (error);
149 ip->i_size = lblktosize(fs, nb + 1);
150 ip->i_ffs1_size = ip->i_size;
151 uvm_vnp_setsize(vp, ip->i_ffs1_size);
152 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
153 ip->i_flag |= IN_CHANGE | IN_UPDATE;
154 if (bpp && *bpp) {
155 if (flags & B_SYNC)
156 bwrite(*bpp);
157 else
158 bawrite(*bpp);
164 * The first NDADDR blocks are direct blocks
167 if (lbn < NDADDR) {
168 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
169 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
172 * The block is an already-allocated direct block
173 * and the file already extends past this block,
174 * thus this must be a whole block.
175 * Just read the block (if requested).
178 if (bpp != NULL) {
179 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
180 B_MODIFY, bpp);
181 if (error) {
182 brelse(*bpp, 0);
183 return (error);
186 return (0);
188 if (nb != 0) {
191 * Consider need to reallocate a fragment.
194 osize = fragroundup(fs, blkoff(fs, ip->i_size));
195 nsize = fragroundup(fs, size);
196 if (nsize <= osize) {
199 * The existing block is already
200 * at least as big as we want.
201 * Just read the block (if requested).
204 if (bpp != NULL) {
205 error = bread(vp, lbn, osize, NOCRED,
206 B_MODIFY, bpp);
207 if (error) {
208 brelse(*bpp, 0);
209 return (error);
212 return 0;
213 } else {
216 * The existing block is smaller than we want,
217 * grow it.
219 mutex_enter(&ump->um_lock);
220 error = ffs_realloccg(ip, lbn,
221 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
222 &ip->i_ffs1_db[0]),
223 osize, nsize, cred, bpp, &newb);
224 if (error)
225 return (error);
227 } else {
230 * the block was not previously allocated,
231 * allocate a new block or fragment.
234 if (ip->i_size < lblktosize(fs, lbn + 1))
235 nsize = fragroundup(fs, size);
236 else
237 nsize = fs->fs_bsize;
238 mutex_enter(&ump->um_lock);
239 error = ffs_alloc(ip, lbn,
240 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
241 &ip->i_ffs1_db[0]),
242 nsize, flags, cred, &newb);
243 if (error)
244 return (error);
245 if (bpp != NULL) {
246 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
247 nsize, (flags & B_CLRBUF) != 0, bpp);
248 if (error)
249 return error;
252 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
253 ip->i_flag |= IN_CHANGE | IN_UPDATE;
254 return (0);
258 * Determine the number of levels of indirection.
261 pref = 0;
262 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
263 return (error);
266 * Fetch the first indirect block allocating if necessary.
269 --num;
270 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
271 allocib = NULL;
272 allocblk = allociblk;
273 if (nb == 0) {
274 mutex_enter(&ump->um_lock);
275 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
276 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
277 flags | B_METAONLY, cred, &newb);
278 if (error)
279 goto fail;
280 nb = newb;
281 *allocblk++ = nb;
282 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
283 fs->fs_bsize, true, &bp);
284 if (error)
285 goto fail;
287 * Write synchronously so that indirect blocks
288 * never point at garbage.
290 if ((error = bwrite(bp)) != 0)
291 goto fail;
292 unwindidx = 0;
293 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
294 *allocib = ufs_rw32(nb, needswap);
295 ip->i_flag |= IN_CHANGE | IN_UPDATE;
299 * Fetch through the indirect blocks, allocating as necessary.
302 for (i = 1;;) {
303 error = bread(vp,
304 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
305 if (error) {
306 brelse(bp, 0);
307 goto fail;
309 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
310 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
311 if (i == num)
312 break;
313 i++;
314 if (nb != 0) {
315 brelse(bp, 0);
316 continue;
318 if (fscow_run(bp, true) != 0) {
319 brelse(bp, 0);
320 goto fail;
322 mutex_enter(&ump->um_lock);
323 /* Try to keep snapshot indirect blocks contiguous. */
324 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
325 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
326 flags | B_METAONLY, &bap[0]);
327 if (pref == 0)
328 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
329 NULL);
330 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
331 flags | B_METAONLY, cred, &newb);
332 if (error) {
333 brelse(bp, 0);
334 goto fail;
336 nb = newb;
337 *allocblk++ = nb;
338 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
339 fs->fs_bsize, true, &nbp);
340 if (error) {
341 brelse(bp, 0);
342 goto fail;
345 * Write synchronously so that indirect blocks
346 * never point at garbage.
348 if ((error = bwrite(nbp)) != 0) {
349 brelse(bp, 0);
350 goto fail;
352 if (unwindidx < 0)
353 unwindidx = i - 1;
354 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
357 * If required, write synchronously, otherwise use
358 * delayed write.
361 if (flags & B_SYNC) {
362 bwrite(bp);
363 } else {
364 bdwrite(bp);
368 if (flags & B_METAONLY) {
369 KASSERT(bpp != NULL);
370 *bpp = bp;
371 return (0);
375 * Get the data block, allocating if necessary.
378 if (nb == 0) {
379 if (fscow_run(bp, true) != 0) {
380 brelse(bp, 0);
381 goto fail;
383 mutex_enter(&ump->um_lock);
384 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
385 &bap[0]);
386 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
387 &newb);
388 if (error) {
389 brelse(bp, 0);
390 goto fail;
392 nb = newb;
393 *allocblk++ = nb;
394 if (bpp != NULL) {
395 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
396 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
397 if (error) {
398 brelse(bp, 0);
399 goto fail;
402 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
403 if (allocib == NULL && unwindidx < 0) {
404 unwindidx = i - 1;
408 * If required, write synchronously, otherwise use
409 * delayed write.
412 if (flags & B_SYNC) {
413 bwrite(bp);
414 } else {
415 bdwrite(bp);
417 return (0);
419 brelse(bp, 0);
420 if (bpp != NULL) {
421 if (flags & B_CLRBUF) {
422 error = bread(vp, lbn, (int)fs->fs_bsize,
423 NOCRED, B_MODIFY, &nbp);
424 if (error) {
425 brelse(nbp, 0);
426 goto fail;
428 } else {
429 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
430 fs->fs_bsize, true, &nbp);
431 if (error)
432 goto fail;
434 *bpp = nbp;
436 return (0);
438 fail:
440 * If we have failed part way through block allocation, we
441 * have to deallocate any indirect blocks that we have allocated.
444 if (unwindidx >= 0) {
447 * First write out any buffers we've created to resolve their
448 * softdeps. This must be done in reverse order of creation
449 * so that we resolve the dependencies in one pass.
450 * Write the cylinder group buffers for these buffers too.
453 for (i = num; i >= unwindidx; i--) {
454 if (i == 0) {
455 break;
457 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
458 fs->fs_bsize, false, &bp) != 0)
459 continue;
460 if (bp->b_oflags & BO_DELWRI) {
461 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
462 dbtofsb(fs, bp->b_blkno))));
463 bwrite(bp);
464 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
465 fs->fs_cgsize, false, &bp) != 0)
466 continue;
467 if (bp->b_oflags & BO_DELWRI) {
468 bwrite(bp);
469 } else {
470 brelse(bp, BC_INVAL);
472 } else {
473 brelse(bp, BC_INVAL);
478 * Undo the partial allocation.
480 if (unwindidx == 0) {
481 *allocib = 0;
482 ip->i_flag |= IN_CHANGE | IN_UPDATE;
483 } else {
484 int r;
486 r = bread(vp, indirs[unwindidx].in_lbn,
487 (int)fs->fs_bsize, NOCRED, 0, &bp);
488 if (r) {
489 panic("Could not unwind indirect block, error %d", r);
490 brelse(bp, 0);
491 } else {
492 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
493 bap[indirs[unwindidx].in_off] = 0;
494 bwrite(bp);
497 for (i = unwindidx + 1; i <= num; i++) {
498 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
499 fs->fs_bsize, false, &bp) == 0)
500 brelse(bp, BC_INVAL);
503 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
504 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
505 deallocated += fs->fs_bsize;
507 if (deallocated) {
508 #if defined(QUOTA) || defined(QUOTA2)
510 * Restore user's disk quota because allocation failed.
512 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
513 #endif
514 ip->i_ffs1_blocks -= btodb(deallocated);
515 ip->i_flag |= IN_CHANGE | IN_UPDATE;
517 return (error);
520 static int
521 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
522 int flags, struct buf **bpp)
524 daddr_t lbn, lastlbn;
525 struct buf *bp, *nbp;
526 struct inode *ip = VTOI(vp);
527 struct fs *fs = ip->i_fs;
528 struct ufsmount *ump = ip->i_ump;
529 struct indir indirs[NIADDR + 2];
530 daddr_t newb, pref, nb;
531 int64_t *bap;
532 int deallocated, osize, nsize, num, i, error;
533 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
534 int64_t *allocib;
535 int unwindidx = -1;
536 #ifdef FFS_EI
537 const int needswap = UFS_FSNEEDSWAP(fs);
538 #endif
539 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
541 lbn = lblkno(fs, off);
542 size = blkoff(fs, off) + size;
543 if (size > fs->fs_bsize)
544 panic("ffs_balloc: blk too big");
545 if (bpp != NULL) {
546 *bpp = NULL;
548 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
550 if (lbn < 0)
551 return (EFBIG);
553 #ifdef notyet
555 * Check for allocating external data.
557 if (flags & IO_EXT) {
558 if (lbn >= NXADDR)
559 return (EFBIG);
561 * If the next write will extend the data into a new block,
562 * and the data is currently composed of a fragment
563 * this fragment has to be extended to be a full block.
565 lastlbn = lblkno(fs, dp->di_extsize);
566 if (lastlbn < lbn) {
567 nb = lastlbn;
568 osize = sblksize(fs, dp->di_extsize, nb);
569 if (osize < fs->fs_bsize && osize > 0) {
570 mutex_enter(&ump->um_lock);
571 error = ffs_realloccg(ip, -1 - nb,
572 dp->di_extb[nb],
573 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
574 flags, &dp->di_extb[0]),
575 osize,
576 (int)fs->fs_bsize, cred, &bp);
577 if (error)
578 return (error);
579 dp->di_extsize = smalllblktosize(fs, nb + 1);
580 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
581 bp->b_xflags |= BX_ALTDATA;
582 ip->i_flag |= IN_CHANGE | IN_UPDATE;
583 if (flags & IO_SYNC)
584 bwrite(bp);
585 else
586 bawrite(bp);
590 * All blocks are direct blocks
592 if (flags & BA_METAONLY)
593 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
594 nb = dp->di_extb[lbn];
595 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
596 error = bread(vp, -1 - lbn, fs->fs_bsize,
597 NOCRED, 0, &bp);
598 if (error) {
599 brelse(bp, 0);
600 return (error);
602 mutex_enter(&bp->b_interlock);
603 bp->b_blkno = fsbtodb(fs, nb);
604 bp->b_xflags |= BX_ALTDATA;
605 mutex_exit(&bp->b_interlock);
606 *bpp = bp;
607 return (0);
609 if (nb != 0) {
611 * Consider need to reallocate a fragment.
613 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
614 nsize = fragroundup(fs, size);
615 if (nsize <= osize) {
616 error = bread(vp, -1 - lbn, osize,
617 NOCRED, 0, &bp);
618 if (error) {
619 brelse(bp, 0);
620 return (error);
622 mutex_enter(&bp->b_interlock);
623 bp->b_blkno = fsbtodb(fs, nb);
624 bp->b_xflags |= BX_ALTDATA;
625 mutex_exit(&bp->b_interlock);
626 } else {
627 mutex_enter(&ump->um_lock);
628 error = ffs_realloccg(ip, -1 - lbn,
629 dp->di_extb[lbn],
630 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
631 &dp->di_extb[0]),
632 osize, nsize, cred, &bp);
633 if (error)
634 return (error);
635 bp->b_xflags |= BX_ALTDATA;
637 } else {
638 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
639 nsize = fragroundup(fs, size);
640 else
641 nsize = fs->fs_bsize;
642 mutex_enter(&ump->um_lock);
643 error = ffs_alloc(ip, lbn,
644 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
645 &dp->di_extb[0]),
646 nsize, flags, cred, &newb);
647 if (error)
648 return (error);
649 error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
650 nsize, (flags & BA_CLRBUF) != 0, &bp);
651 if (error)
652 return error;
653 bp->b_xflags |= BX_ALTDATA;
655 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
656 ip->i_flag |= IN_CHANGE | IN_UPDATE;
657 *bpp = bp;
658 return (0);
660 #endif
662 * If the next write will extend the file into a new block,
663 * and the file is currently composed of a fragment
664 * this fragment has to be extended to be a full block.
667 lastlbn = lblkno(fs, ip->i_size);
668 if (lastlbn < NDADDR && lastlbn < lbn) {
669 nb = lastlbn;
670 osize = blksize(fs, ip, nb);
671 if (osize < fs->fs_bsize && osize > 0) {
672 mutex_enter(&ump->um_lock);
673 error = ffs_realloccg(ip, nb,
674 ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
675 &ip->i_ffs2_db[0]),
676 osize, (int)fs->fs_bsize, cred, bpp, &newb);
677 if (error)
678 return (error);
679 ip->i_size = lblktosize(fs, nb + 1);
680 ip->i_ffs2_size = ip->i_size;
681 uvm_vnp_setsize(vp, ip->i_size);
682 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
683 ip->i_flag |= IN_CHANGE | IN_UPDATE;
684 if (bpp) {
685 if (flags & B_SYNC)
686 bwrite(*bpp);
687 else
688 bawrite(*bpp);
694 * The first NDADDR blocks are direct blocks
697 if (lbn < NDADDR) {
698 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
699 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
702 * The block is an already-allocated direct block
703 * and the file already extends past this block,
704 * thus this must be a whole block.
705 * Just read the block (if requested).
708 if (bpp != NULL) {
709 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
710 B_MODIFY, bpp);
711 if (error) {
712 brelse(*bpp, 0);
713 return (error);
716 return (0);
718 if (nb != 0) {
721 * Consider need to reallocate a fragment.
724 osize = fragroundup(fs, blkoff(fs, ip->i_size));
725 nsize = fragroundup(fs, size);
726 if (nsize <= osize) {
729 * The existing block is already
730 * at least as big as we want.
731 * Just read the block (if requested).
734 if (bpp != NULL) {
735 error = bread(vp, lbn, osize, NOCRED,
736 B_MODIFY, bpp);
737 if (error) {
738 brelse(*bpp, 0);
739 return (error);
742 return 0;
743 } else {
746 * The existing block is smaller than we want,
747 * grow it.
749 mutex_enter(&ump->um_lock);
750 error = ffs_realloccg(ip, lbn,
751 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
752 &ip->i_ffs2_db[0]),
753 osize, nsize, cred, bpp, &newb);
754 if (error)
755 return (error);
757 } else {
760 * the block was not previously allocated,
761 * allocate a new block or fragment.
764 if (ip->i_size < lblktosize(fs, lbn + 1))
765 nsize = fragroundup(fs, size);
766 else
767 nsize = fs->fs_bsize;
768 mutex_enter(&ump->um_lock);
769 error = ffs_alloc(ip, lbn,
770 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
771 &ip->i_ffs2_db[0]),
772 nsize, flags, cred, &newb);
773 if (error)
774 return (error);
775 if (bpp != NULL) {
776 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
777 nsize, (flags & B_CLRBUF) != 0, bpp);
778 if (error)
779 return error;
782 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
783 ip->i_flag |= IN_CHANGE | IN_UPDATE;
784 return (0);
788 * Determine the number of levels of indirection.
791 pref = 0;
792 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
793 return (error);
796 * Fetch the first indirect block allocating if necessary.
799 --num;
800 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
801 allocib = NULL;
802 allocblk = allociblk;
803 if (nb == 0) {
804 mutex_enter(&ump->um_lock);
805 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
806 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
807 flags | B_METAONLY, cred, &newb);
808 if (error)
809 goto fail;
810 nb = newb;
811 *allocblk++ = nb;
812 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
813 fs->fs_bsize, true, &bp);
814 if (error)
815 goto fail;
817 * Write synchronously so that indirect blocks
818 * never point at garbage.
820 if ((error = bwrite(bp)) != 0)
821 goto fail;
822 unwindidx = 0;
823 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
824 *allocib = ufs_rw64(nb, needswap);
825 ip->i_flag |= IN_CHANGE | IN_UPDATE;
829 * Fetch through the indirect blocks, allocating as necessary.
832 for (i = 1;;) {
833 error = bread(vp,
834 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
835 if (error) {
836 brelse(bp, 0);
837 goto fail;
839 bap = (int64_t *)bp->b_data;
840 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
841 if (i == num)
842 break;
843 i++;
844 if (nb != 0) {
845 brelse(bp, 0);
846 continue;
848 if (fscow_run(bp, true) != 0) {
849 brelse(bp, 0);
850 goto fail;
852 mutex_enter(&ump->um_lock);
853 /* Try to keep snapshot indirect blocks contiguous. */
854 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
855 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
856 flags | B_METAONLY, &bap[0]);
857 if (pref == 0)
858 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
859 NULL);
860 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
861 flags | B_METAONLY, cred, &newb);
862 if (error) {
863 brelse(bp, 0);
864 goto fail;
866 nb = newb;
867 *allocblk++ = nb;
868 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
869 fs->fs_bsize, true, &nbp);
870 if (error) {
871 brelse(bp, 0);
872 goto fail;
875 * Write synchronously so that indirect blocks
876 * never point at garbage.
878 if ((error = bwrite(nbp)) != 0) {
879 brelse(bp, 0);
880 goto fail;
882 if (unwindidx < 0)
883 unwindidx = i - 1;
884 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
887 * If required, write synchronously, otherwise use
888 * delayed write.
891 if (flags & B_SYNC) {
892 bwrite(bp);
893 } else {
894 bdwrite(bp);
898 if (flags & B_METAONLY) {
899 KASSERT(bpp != NULL);
900 *bpp = bp;
901 return (0);
905 * Get the data block, allocating if necessary.
908 if (nb == 0) {
909 if (fscow_run(bp, true) != 0) {
910 brelse(bp, 0);
911 goto fail;
913 mutex_enter(&ump->um_lock);
914 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
915 &bap[0]);
916 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
917 &newb);
918 if (error) {
919 brelse(bp, 0);
920 goto fail;
922 nb = newb;
923 *allocblk++ = nb;
924 if (bpp != NULL) {
925 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
926 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
927 if (error) {
928 brelse(bp, 0);
929 goto fail;
932 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
933 if (allocib == NULL && unwindidx < 0) {
934 unwindidx = i - 1;
938 * If required, write synchronously, otherwise use
939 * delayed write.
942 if (flags & B_SYNC) {
943 bwrite(bp);
944 } else {
945 bdwrite(bp);
947 return (0);
949 brelse(bp, 0);
950 if (bpp != NULL) {
951 if (flags & B_CLRBUF) {
952 error = bread(vp, lbn, (int)fs->fs_bsize,
953 NOCRED, B_MODIFY, &nbp);
954 if (error) {
955 brelse(nbp, 0);
956 goto fail;
958 } else {
959 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
960 fs->fs_bsize, true, &nbp);
961 if (error)
962 goto fail;
964 *bpp = nbp;
966 return (0);
968 fail:
970 * If we have failed part way through block allocation, we
971 * have to deallocate any indirect blocks that we have allocated.
974 if (unwindidx >= 0) {
977 * First write out any buffers we've created to resolve their
978 * softdeps. This must be done in reverse order of creation
979 * so that we resolve the dependencies in one pass.
980 * Write the cylinder group buffers for these buffers too.
983 for (i = num; i >= unwindidx; i--) {
984 if (i == 0) {
985 break;
987 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
988 fs->fs_bsize, false, &bp) != 0)
989 continue;
990 if (bp->b_oflags & BO_DELWRI) {
991 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
992 dbtofsb(fs, bp->b_blkno))));
993 bwrite(bp);
994 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
995 fs->fs_cgsize, false, &bp) != 0)
996 continue;
997 if (bp->b_oflags & BO_DELWRI) {
998 bwrite(bp);
999 } else {
1000 brelse(bp, BC_INVAL);
1002 } else {
1003 brelse(bp, BC_INVAL);
1008 * Now that any dependencies that we created have been
1009 * resolved, we can undo the partial allocation.
1012 if (unwindidx == 0) {
1013 *allocib = 0;
1014 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1015 } else {
1016 int r;
1018 r = bread(vp, indirs[unwindidx].in_lbn,
1019 (int)fs->fs_bsize, NOCRED, 0, &bp);
1020 if (r) {
1021 panic("Could not unwind indirect block, error %d", r);
1022 brelse(bp, 0);
1023 } else {
1024 bap = (int64_t *)bp->b_data;
1025 bap[indirs[unwindidx].in_off] = 0;
1026 bwrite(bp);
1029 for (i = unwindidx + 1; i <= num; i++) {
1030 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1031 fs->fs_bsize, false, &bp) == 0)
1032 brelse(bp, BC_INVAL);
1035 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1036 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1037 deallocated += fs->fs_bsize;
1039 if (deallocated) {
1040 #if defined(QUOTA) || defined(QUOTA2)
1042 * Restore user's disk quota because allocation failed.
1044 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1045 #endif
1046 ip->i_ffs2_blocks -= btodb(deallocated);
1047 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1050 return (error);