Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / sys / ufs / ffs / ffs_balloc.c
blob025d8d4fa25921844285bb72bd2f26fbd293655d
1 /* $NetBSD: ffs_balloc.c,v 1.51 2008/07/31 05:38:06 simonb Exp $ */
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.51 2008/07/31 05:38:06 simonb Exp $");
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
68 #include <uvm/uvm.h>
70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71 struct buf **);
72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73 struct buf **);
76 * Balloc defines the structure of file system storage
77 * by allocating the physical blocks on a device given
78 * the inode and the logical block number in a file.
81 int
82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
83 struct buf **bpp)
85 int error;
87 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
88 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
89 else
90 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
92 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
93 brelse(*bpp, 0);
95 return error;
98 static int
99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
100 int flags, struct buf **bpp)
102 daddr_t lbn, lastlbn;
103 struct buf *bp, *nbp;
104 struct inode *ip = VTOI(vp);
105 struct fs *fs = ip->i_fs;
106 struct ufsmount *ump = ip->i_ump;
107 struct indir indirs[NIADDR + 2];
108 daddr_t newb, pref, nb;
109 int32_t *bap; /* XXX ondisk32 */
110 int deallocated, osize, nsize, num, i, error;
111 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
112 int32_t *allocib;
113 int unwindidx = -1;
114 #ifdef FFS_EI
115 const int needswap = UFS_FSNEEDSWAP(fs);
116 #endif
117 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
119 lbn = lblkno(fs, off);
120 size = blkoff(fs, off) + size;
121 if (size > fs->fs_bsize)
122 panic("ffs_balloc: blk too big");
123 if (bpp != NULL) {
124 *bpp = NULL;
126 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
128 if (lbn < 0)
129 return (EFBIG);
132 * If the next write will extend the file into a new block,
133 * and the file is currently composed of a fragment
134 * this fragment has to be extended to be a full block.
137 lastlbn = lblkno(fs, ip->i_size);
138 if (lastlbn < NDADDR && lastlbn < lbn) {
139 nb = lastlbn;
140 osize = blksize(fs, ip, nb);
141 if (osize < fs->fs_bsize && osize > 0) {
142 mutex_enter(&ump->um_lock);
143 error = ffs_realloccg(ip, nb,
144 ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
145 &ip->i_ffs1_db[0]),
146 osize, (int)fs->fs_bsize, cred, bpp, &newb);
147 if (error)
148 return (error);
149 ip->i_size = lblktosize(fs, nb + 1);
150 ip->i_ffs1_size = ip->i_size;
151 uvm_vnp_setsize(vp, ip->i_ffs1_size);
152 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
153 ip->i_flag |= IN_CHANGE | IN_UPDATE;
154 if (bpp && *bpp) {
155 if (flags & B_SYNC)
156 bwrite(*bpp);
157 else
158 bawrite(*bpp);
164 * The first NDADDR blocks are direct blocks
167 if (lbn < NDADDR) {
168 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
169 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
172 * The block is an already-allocated direct block
173 * and the file already extends past this block,
174 * thus this must be a whole block.
175 * Just read the block (if requested).
178 if (bpp != NULL) {
179 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
180 B_MODIFY, bpp);
181 if (error) {
182 brelse(*bpp, 0);
183 return (error);
186 return (0);
188 if (nb != 0) {
191 * Consider need to reallocate a fragment.
194 osize = fragroundup(fs, blkoff(fs, ip->i_size));
195 nsize = fragroundup(fs, size);
196 if (nsize <= osize) {
199 * The existing block is already
200 * at least as big as we want.
201 * Just read the block (if requested).
204 if (bpp != NULL) {
205 error = bread(vp, lbn, osize, NOCRED,
206 B_MODIFY, bpp);
207 if (error) {
208 brelse(*bpp, 0);
209 return (error);
212 return 0;
213 } else {
216 * The existing block is smaller than we want,
217 * grow it.
219 mutex_enter(&ump->um_lock);
220 error = ffs_realloccg(ip, lbn,
221 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
222 &ip->i_ffs1_db[0]),
223 osize, nsize, cred, bpp, &newb);
224 if (error)
225 return (error);
227 } else {
230 * the block was not previously allocated,
231 * allocate a new block or fragment.
234 if (ip->i_size < lblktosize(fs, lbn + 1))
235 nsize = fragroundup(fs, size);
236 else
237 nsize = fs->fs_bsize;
238 mutex_enter(&ump->um_lock);
239 error = ffs_alloc(ip, lbn,
240 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
241 &ip->i_ffs1_db[0]),
242 nsize, flags, cred, &newb);
243 if (error)
244 return (error);
245 if (bpp != NULL) {
246 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
247 nsize, (flags & B_CLRBUF) != 0, bpp);
248 if (error)
249 return error;
252 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
253 ip->i_flag |= IN_CHANGE | IN_UPDATE;
254 return (0);
258 * Determine the number of levels of indirection.
261 pref = 0;
262 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
263 return (error);
266 * Fetch the first indirect block allocating if necessary.
269 --num;
270 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
271 allocib = NULL;
272 allocblk = allociblk;
273 if (nb == 0) {
274 mutex_enter(&ump->um_lock);
275 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
276 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
277 flags | B_METAONLY, cred, &newb);
278 if (error)
279 goto fail;
280 nb = newb;
281 *allocblk++ = nb;
282 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
283 fs->fs_bsize, true, &bp);
284 if (error)
285 goto fail;
287 * Write synchronously so that indirect blocks
288 * never point at garbage.
290 if ((error = bwrite(bp)) != 0)
291 goto fail;
292 unwindidx = 0;
293 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
294 *allocib = ufs_rw32(nb, needswap);
295 ip->i_flag |= IN_CHANGE | IN_UPDATE;
299 * Fetch through the indirect blocks, allocating as necessary.
302 for (i = 1;;) {
303 error = bread(vp,
304 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
305 if (error) {
306 brelse(bp, 0);
307 goto fail;
309 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
310 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
311 if (i == num)
312 break;
313 i++;
314 if (nb != 0) {
315 brelse(bp, 0);
316 continue;
318 if (fscow_run(bp, true) != 0) {
319 brelse(bp, 0);
320 goto fail;
322 mutex_enter(&ump->um_lock);
323 if (pref == 0)
324 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
325 NULL);
326 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
327 flags | B_METAONLY, cred, &newb);
328 if (error) {
329 brelse(bp, 0);
330 goto fail;
332 nb = newb;
333 *allocblk++ = nb;
334 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
335 fs->fs_bsize, true, &nbp);
336 if (error) {
337 brelse(bp, 0);
338 goto fail;
341 * Write synchronously so that indirect blocks
342 * never point at garbage.
344 if ((error = bwrite(nbp)) != 0) {
345 brelse(bp, 0);
346 goto fail;
348 if (unwindidx < 0)
349 unwindidx = i - 1;
350 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
353 * If required, write synchronously, otherwise use
354 * delayed write.
357 if (flags & B_SYNC) {
358 bwrite(bp);
359 } else {
360 bdwrite(bp);
364 if (flags & B_METAONLY) {
365 KASSERT(bpp != NULL);
366 *bpp = bp;
367 return (0);
371 * Get the data block, allocating if necessary.
374 if (nb == 0) {
375 if (fscow_run(bp, true) != 0) {
376 brelse(bp, 0);
377 goto fail;
379 mutex_enter(&ump->um_lock);
380 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
381 &bap[0]);
382 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
383 &newb);
384 if (error) {
385 brelse(bp, 0);
386 goto fail;
388 nb = newb;
389 *allocblk++ = nb;
390 if (bpp != NULL) {
391 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
392 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
393 if (error) {
394 brelse(bp, 0);
395 goto fail;
398 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
399 if (allocib == NULL && unwindidx < 0) {
400 unwindidx = i - 1;
404 * If required, write synchronously, otherwise use
405 * delayed write.
408 if (flags & B_SYNC) {
409 bwrite(bp);
410 } else {
411 bdwrite(bp);
413 return (0);
415 brelse(bp, 0);
416 if (bpp != NULL) {
417 if (flags & B_CLRBUF) {
418 error = bread(vp, lbn, (int)fs->fs_bsize,
419 NOCRED, B_MODIFY, &nbp);
420 if (error) {
421 brelse(nbp, 0);
422 goto fail;
424 } else {
425 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
426 fs->fs_bsize, true, &nbp);
427 if (error)
428 goto fail;
430 *bpp = nbp;
432 return (0);
434 fail:
436 * If we have failed part way through block allocation, we
437 * have to deallocate any indirect blocks that we have allocated.
440 if (unwindidx >= 0) {
443 * First write out any buffers we've created to resolve their
444 * softdeps. This must be done in reverse order of creation
445 * so that we resolve the dependencies in one pass.
446 * Write the cylinder group buffers for these buffers too.
449 for (i = num; i >= unwindidx; i--) {
450 if (i == 0) {
451 break;
453 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
454 fs->fs_bsize, false, &bp) != 0)
455 continue;
456 if (bp->b_oflags & BO_DELWRI) {
457 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
458 dbtofsb(fs, bp->b_blkno))));
459 bwrite(bp);
460 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
461 fs->fs_cgsize, false, &bp) != 0)
462 continue;
463 if (bp->b_oflags & BO_DELWRI) {
464 bwrite(bp);
465 } else {
466 brelse(bp, BC_INVAL);
468 } else {
469 brelse(bp, BC_INVAL);
474 * Undo the partial allocation.
476 if (unwindidx == 0) {
477 *allocib = 0;
478 ip->i_flag |= IN_CHANGE | IN_UPDATE;
479 } else {
480 int r;
482 r = bread(vp, indirs[unwindidx].in_lbn,
483 (int)fs->fs_bsize, NOCRED, 0, &bp);
484 if (r) {
485 panic("Could not unwind indirect block, error %d", r);
486 brelse(bp, 0);
487 } else {
488 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
489 bap[indirs[unwindidx].in_off] = 0;
490 bwrite(bp);
493 for (i = unwindidx + 1; i <= num; i++) {
494 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
495 fs->fs_bsize, false, &bp) == 0)
496 brelse(bp, BC_INVAL);
499 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
500 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
501 deallocated += fs->fs_bsize;
503 if (deallocated) {
504 #ifdef QUOTA
506 * Restore user's disk quota because allocation failed.
508 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
509 #endif
510 ip->i_ffs1_blocks -= btodb(deallocated);
511 ip->i_flag |= IN_CHANGE | IN_UPDATE;
513 return (error);
516 static int
517 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
518 int flags, struct buf **bpp)
520 daddr_t lbn, lastlbn;
521 struct buf *bp, *nbp;
522 struct inode *ip = VTOI(vp);
523 struct fs *fs = ip->i_fs;
524 struct ufsmount *ump = ip->i_ump;
525 struct indir indirs[NIADDR + 2];
526 daddr_t newb, pref, nb;
527 int64_t *bap;
528 int deallocated, osize, nsize, num, i, error;
529 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
530 int64_t *allocib;
531 int unwindidx = -1;
532 #ifdef FFS_EI
533 const int needswap = UFS_FSNEEDSWAP(fs);
534 #endif
535 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
537 lbn = lblkno(fs, off);
538 size = blkoff(fs, off) + size;
539 if (size > fs->fs_bsize)
540 panic("ffs_balloc: blk too big");
541 if (bpp != NULL) {
542 *bpp = NULL;
544 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
546 if (lbn < 0)
547 return (EFBIG);
549 #ifdef notyet
551 * Check for allocating external data.
553 if (flags & IO_EXT) {
554 if (lbn >= NXADDR)
555 return (EFBIG);
557 * If the next write will extend the data into a new block,
558 * and the data is currently composed of a fragment
559 * this fragment has to be extended to be a full block.
561 lastlbn = lblkno(fs, dp->di_extsize);
562 if (lastlbn < lbn) {
563 nb = lastlbn;
564 osize = sblksize(fs, dp->di_extsize, nb);
565 if (osize < fs->fs_bsize && osize > 0) {
566 mutex_enter(&ump->um_lock);
567 error = ffs_realloccg(ip, -1 - nb,
568 dp->di_extb[nb],
569 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
570 flags, &dp->di_extb[0]),
571 osize,
572 (int)fs->fs_bsize, cred, &bp);
573 if (error)
574 return (error);
575 dp->di_extsize = smalllblktosize(fs, nb + 1);
576 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
577 bp->b_xflags |= BX_ALTDATA;
578 ip->i_flag |= IN_CHANGE | IN_UPDATE;
579 if (flags & IO_SYNC)
580 bwrite(bp);
581 else
582 bawrite(bp);
586 * All blocks are direct blocks
588 if (flags & BA_METAONLY)
589 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
590 nb = dp->di_extb[lbn];
591 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
592 error = bread(vp, -1 - lbn, fs->fs_bsize,
593 NOCRED, 0, &bp);
594 if (error) {
595 brelse(bp, 0);
596 return (error);
598 mutex_enter(&bp->b_interlock);
599 bp->b_blkno = fsbtodb(fs, nb);
600 bp->b_xflags |= BX_ALTDATA;
601 mutex_exit(&bp->b_interlock);
602 *bpp = bp;
603 return (0);
605 if (nb != 0) {
607 * Consider need to reallocate a fragment.
609 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
610 nsize = fragroundup(fs, size);
611 if (nsize <= osize) {
612 error = bread(vp, -1 - lbn, osize,
613 NOCRED, 0, &bp);
614 if (error) {
615 brelse(bp, 0);
616 return (error);
618 mutex_enter(&bp->b_interlock);
619 bp->b_blkno = fsbtodb(fs, nb);
620 bp->b_xflags |= BX_ALTDATA;
621 mutex_exit(&bp->b_interlock);
622 } else {
623 mutex_enter(&ump->um_lock);
624 error = ffs_realloccg(ip, -1 - lbn,
625 dp->di_extb[lbn],
626 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
627 &dp->di_extb[0]),
628 osize, nsize, cred, &bp);
629 if (error)
630 return (error);
631 bp->b_xflags |= BX_ALTDATA;
633 } else {
634 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
635 nsize = fragroundup(fs, size);
636 else
637 nsize = fs->fs_bsize;
638 mutex_enter(&ump->um_lock);
639 error = ffs_alloc(ip, lbn,
640 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
641 &dp->di_extb[0]),
642 nsize, flags, cred, &newb);
643 if (error)
644 return (error);
645 error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
646 nsize, (flags & BA_CLRBUF) != 0, &bp);
647 if (error)
648 return error;
649 bp->b_xflags |= BX_ALTDATA;
651 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
652 ip->i_flag |= IN_CHANGE | IN_UPDATE;
653 *bpp = bp;
654 return (0);
656 #endif
658 * If the next write will extend the file into a new block,
659 * and the file is currently composed of a fragment
660 * this fragment has to be extended to be a full block.
663 lastlbn = lblkno(fs, ip->i_size);
664 if (lastlbn < NDADDR && lastlbn < lbn) {
665 nb = lastlbn;
666 osize = blksize(fs, ip, nb);
667 if (osize < fs->fs_bsize && osize > 0) {
668 mutex_enter(&ump->um_lock);
669 error = ffs_realloccg(ip, nb,
670 ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
671 &ip->i_ffs2_db[0]),
672 osize, (int)fs->fs_bsize, cred, bpp, &newb);
673 if (error)
674 return (error);
675 ip->i_size = lblktosize(fs, nb + 1);
676 ip->i_ffs2_size = ip->i_size;
677 uvm_vnp_setsize(vp, ip->i_size);
678 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
679 ip->i_flag |= IN_CHANGE | IN_UPDATE;
680 if (bpp) {
681 if (flags & B_SYNC)
682 bwrite(*bpp);
683 else
684 bawrite(*bpp);
690 * The first NDADDR blocks are direct blocks
693 if (lbn < NDADDR) {
694 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
695 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
698 * The block is an already-allocated direct block
699 * and the file already extends past this block,
700 * thus this must be a whole block.
701 * Just read the block (if requested).
704 if (bpp != NULL) {
705 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
706 B_MODIFY, bpp);
707 if (error) {
708 brelse(*bpp, 0);
709 return (error);
712 return (0);
714 if (nb != 0) {
717 * Consider need to reallocate a fragment.
720 osize = fragroundup(fs, blkoff(fs, ip->i_size));
721 nsize = fragroundup(fs, size);
722 if (nsize <= osize) {
725 * The existing block is already
726 * at least as big as we want.
727 * Just read the block (if requested).
730 if (bpp != NULL) {
731 error = bread(vp, lbn, osize, NOCRED,
732 B_MODIFY, bpp);
733 if (error) {
734 brelse(*bpp, 0);
735 return (error);
738 return 0;
739 } else {
742 * The existing block is smaller than we want,
743 * grow it.
745 mutex_enter(&ump->um_lock);
746 error = ffs_realloccg(ip, lbn,
747 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
748 &ip->i_ffs2_db[0]),
749 osize, nsize, cred, bpp, &newb);
750 if (error)
751 return (error);
753 } else {
756 * the block was not previously allocated,
757 * allocate a new block or fragment.
760 if (ip->i_size < lblktosize(fs, lbn + 1))
761 nsize = fragroundup(fs, size);
762 else
763 nsize = fs->fs_bsize;
764 mutex_enter(&ump->um_lock);
765 error = ffs_alloc(ip, lbn,
766 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
767 &ip->i_ffs2_db[0]),
768 nsize, flags, cred, &newb);
769 if (error)
770 return (error);
771 if (bpp != NULL) {
772 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
773 nsize, (flags & B_CLRBUF) != 0, bpp);
774 if (error)
775 return error;
778 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
779 ip->i_flag |= IN_CHANGE | IN_UPDATE;
780 return (0);
784 * Determine the number of levels of indirection.
787 pref = 0;
788 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
789 return (error);
792 * Fetch the first indirect block allocating if necessary.
795 --num;
796 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
797 allocib = NULL;
798 allocblk = allociblk;
799 if (nb == 0) {
800 mutex_enter(&ump->um_lock);
801 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
802 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
803 flags | B_METAONLY, cred, &newb);
804 if (error)
805 goto fail;
806 nb = newb;
807 *allocblk++ = nb;
808 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
809 fs->fs_bsize, true, &bp);
810 if (error)
811 goto fail;
813 * Write synchronously so that indirect blocks
814 * never point at garbage.
816 if ((error = bwrite(bp)) != 0)
817 goto fail;
818 unwindidx = 0;
819 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
820 *allocib = ufs_rw64(nb, needswap);
821 ip->i_flag |= IN_CHANGE | IN_UPDATE;
825 * Fetch through the indirect blocks, allocating as necessary.
828 for (i = 1;;) {
829 error = bread(vp,
830 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
831 if (error) {
832 brelse(bp, 0);
833 goto fail;
835 bap = (int64_t *)bp->b_data;
836 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
837 if (i == num)
838 break;
839 i++;
840 if (nb != 0) {
841 brelse(bp, 0);
842 continue;
844 if (fscow_run(bp, true) != 0) {
845 brelse(bp, 0);
846 goto fail;
848 mutex_enter(&ump->um_lock);
849 if (pref == 0)
850 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
851 NULL);
852 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
853 flags | B_METAONLY, cred, &newb);
854 if (error) {
855 brelse(bp, 0);
856 goto fail;
858 nb = newb;
859 *allocblk++ = nb;
860 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
861 fs->fs_bsize, true, &nbp);
862 if (error) {
863 brelse(bp, 0);
864 goto fail;
867 * Write synchronously so that indirect blocks
868 * never point at garbage.
870 if ((error = bwrite(nbp)) != 0) {
871 brelse(bp, 0);
872 goto fail;
874 if (unwindidx < 0)
875 unwindidx = i - 1;
876 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
879 * If required, write synchronously, otherwise use
880 * delayed write.
883 if (flags & B_SYNC) {
884 bwrite(bp);
885 } else {
886 bdwrite(bp);
890 if (flags & B_METAONLY) {
891 KASSERT(bpp != NULL);
892 *bpp = bp;
893 return (0);
897 * Get the data block, allocating if necessary.
900 if (nb == 0) {
901 if (fscow_run(bp, true) != 0) {
902 brelse(bp, 0);
903 goto fail;
905 mutex_enter(&ump->um_lock);
906 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
907 &bap[0]);
908 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
909 &newb);
910 if (error) {
911 brelse(bp, 0);
912 goto fail;
914 nb = newb;
915 *allocblk++ = nb;
916 if (bpp != NULL) {
917 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
918 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
919 if (error) {
920 brelse(bp, 0);
921 goto fail;
924 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
925 if (allocib == NULL && unwindidx < 0) {
926 unwindidx = i - 1;
930 * If required, write synchronously, otherwise use
931 * delayed write.
934 if (flags & B_SYNC) {
935 bwrite(bp);
936 } else {
937 bdwrite(bp);
939 return (0);
941 brelse(bp, 0);
942 if (bpp != NULL) {
943 if (flags & B_CLRBUF) {
944 error = bread(vp, lbn, (int)fs->fs_bsize,
945 NOCRED, B_MODIFY, &nbp);
946 if (error) {
947 brelse(nbp, 0);
948 goto fail;
950 } else {
951 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
952 fs->fs_bsize, true, &nbp);
953 if (error)
954 goto fail;
956 *bpp = nbp;
958 return (0);
960 fail:
962 * If we have failed part way through block allocation, we
963 * have to deallocate any indirect blocks that we have allocated.
966 if (unwindidx >= 0) {
969 * First write out any buffers we've created to resolve their
970 * softdeps. This must be done in reverse order of creation
971 * so that we resolve the dependencies in one pass.
972 * Write the cylinder group buffers for these buffers too.
975 for (i = num; i >= unwindidx; i--) {
976 if (i == 0) {
977 break;
979 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
980 fs->fs_bsize, false, &bp) != 0)
981 continue;
982 if (bp->b_oflags & BO_DELWRI) {
983 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
984 dbtofsb(fs, bp->b_blkno))));
985 bwrite(bp);
986 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
987 fs->fs_cgsize, false, &bp) != 0)
988 continue;
989 if (bp->b_oflags & BO_DELWRI) {
990 bwrite(bp);
991 } else {
992 brelse(bp, BC_INVAL);
994 } else {
995 brelse(bp, BC_INVAL);
1000 * Now that any dependencies that we created have been
1001 * resolved, we can undo the partial allocation.
1004 if (unwindidx == 0) {
1005 *allocib = 0;
1006 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1007 } else {
1008 int r;
1010 r = bread(vp, indirs[unwindidx].in_lbn,
1011 (int)fs->fs_bsize, NOCRED, 0, &bp);
1012 if (r) {
1013 panic("Could not unwind indirect block, error %d", r);
1014 brelse(bp, 0);
1015 } else {
1016 bap = (int64_t *)bp->b_data;
1017 bap[indirs[unwindidx].in_off] = 0;
1018 bwrite(bp);
1021 for (i = unwindidx + 1; i <= num; i++) {
1022 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1023 fs->fs_bsize, false, &bp) == 0)
1024 brelse(bp, BC_INVAL);
1027 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1028 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1029 deallocated += fs->fs_bsize;
1031 if (deallocated) {
1032 #ifdef QUOTA
1034 * Restore user's disk quota because allocation failed.
1036 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1037 #endif
1038 ip->i_ffs2_blocks -= btodb(deallocated);
1039 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1042 return (error);