Sync with cat.c from netbsd-8
[minix3.git] / sys / ufs / ffs / ffs_balloc.c
blob0827fbd8cd13ea9989158b92ad63faf85f4028a0
1 /* $NetBSD: ffs_balloc.c,v 1.61 2015/03/28 19:24:04 maxv Exp $ */
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.61 2015/03/28 19:24:04 maxv Exp $");
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
68 #include <uvm/uvm.h>
70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71 struct buf **);
72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73 struct buf **);
76 * Balloc defines the structure of file system storage
77 * by allocating the physical blocks on a device given
78 * the inode and the logical block number in a file.
81 int
82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
83 struct buf **bpp)
85 int error;
87 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
88 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
89 else
90 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
92 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
93 brelse(*bpp, 0);
95 return error;
98 static int
99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
100 int flags, struct buf **bpp)
102 daddr_t lbn, lastlbn;
103 struct buf *bp, *nbp;
104 struct inode *ip = VTOI(vp);
105 struct fs *fs = ip->i_fs;
106 struct ufsmount *ump = ip->i_ump;
107 struct indir indirs[UFS_NIADDR + 2];
108 daddr_t newb, pref, nb;
109 int32_t *bap; /* XXX ondisk32 */
110 int deallocated, osize, nsize, num, i, error;
111 int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
112 int32_t *allocib;
113 int unwindidx = -1;
114 const int needswap = UFS_FSNEEDSWAP(fs);
115 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
117 lbn = ffs_lblkno(fs, off);
118 size = ffs_blkoff(fs, off) + size;
119 if (size > fs->fs_bsize)
120 panic("ffs_balloc: blk too big");
121 if (bpp != NULL) {
122 *bpp = NULL;
124 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
126 if (lbn < 0)
127 return (EFBIG);
130 * If the next write will extend the file into a new block,
131 * and the file is currently composed of a fragment
132 * this fragment has to be extended to be a full block.
135 lastlbn = ffs_lblkno(fs, ip->i_size);
136 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
137 nb = lastlbn;
138 osize = ffs_blksize(fs, ip, nb);
139 if (osize < fs->fs_bsize && osize > 0) {
140 mutex_enter(&ump->um_lock);
141 error = ffs_realloccg(ip, nb,
142 ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
143 &ip->i_ffs1_db[0]),
144 osize, (int)fs->fs_bsize, cred, bpp, &newb);
145 if (error)
146 return (error);
147 ip->i_size = ffs_lblktosize(fs, nb + 1);
148 ip->i_ffs1_size = ip->i_size;
149 uvm_vnp_setsize(vp, ip->i_ffs1_size);
150 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
151 ip->i_flag |= IN_CHANGE | IN_UPDATE;
152 if (bpp && *bpp) {
153 if (flags & B_SYNC)
154 bwrite(*bpp);
155 else
156 bawrite(*bpp);
162 * The first UFS_NDADDR blocks are direct blocks
165 if (lbn < UFS_NDADDR) {
166 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
167 if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
170 * The block is an already-allocated direct block
171 * and the file already extends past this block,
172 * thus this must be a whole block.
173 * Just read the block (if requested).
176 if (bpp != NULL) {
177 error = bread(vp, lbn, fs->fs_bsize,
178 B_MODIFY, bpp);
179 if (error) {
180 return (error);
183 return (0);
185 if (nb != 0) {
188 * Consider need to reallocate a fragment.
191 osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
192 nsize = ffs_fragroundup(fs, size);
193 if (nsize <= osize) {
196 * The existing block is already
197 * at least as big as we want.
198 * Just read the block (if requested).
201 if (bpp != NULL) {
202 error = bread(vp, lbn, osize,
203 B_MODIFY, bpp);
204 if (error) {
205 return (error);
208 return 0;
209 } else {
212 * The existing block is smaller than we want,
213 * grow it.
215 mutex_enter(&ump->um_lock);
216 error = ffs_realloccg(ip, lbn,
217 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
218 &ip->i_ffs1_db[0]),
219 osize, nsize, cred, bpp, &newb);
220 if (error)
221 return (error);
223 } else {
226 * the block was not previously allocated,
227 * allocate a new block or fragment.
230 if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
231 nsize = ffs_fragroundup(fs, size);
232 else
233 nsize = fs->fs_bsize;
234 mutex_enter(&ump->um_lock);
235 error = ffs_alloc(ip, lbn,
236 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
237 &ip->i_ffs1_db[0]),
238 nsize, flags, cred, &newb);
239 if (error)
240 return (error);
241 if (bpp != NULL) {
242 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
243 nsize, (flags & B_CLRBUF) != 0, bpp);
244 if (error)
245 return error;
248 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
249 ip->i_flag |= IN_CHANGE | IN_UPDATE;
250 return (0);
254 * Determine the number of levels of indirection.
257 pref = 0;
258 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
259 return (error);
262 * Fetch the first indirect block allocating if necessary.
265 --num;
266 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
267 allocib = NULL;
268 allocblk = allociblk;
269 if (nb == 0) {
270 mutex_enter(&ump->um_lock);
271 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
272 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
273 flags | B_METAONLY, cred, &newb);
274 if (error)
275 goto fail;
276 nb = newb;
277 *allocblk++ = nb;
278 error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
279 fs->fs_bsize, true, &bp);
280 if (error)
281 goto fail;
283 * Write synchronously so that indirect blocks
284 * never point at garbage.
286 if ((error = bwrite(bp)) != 0)
287 goto fail;
288 unwindidx = 0;
289 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
290 *allocib = ufs_rw32(nb, needswap);
291 ip->i_flag |= IN_CHANGE | IN_UPDATE;
295 * Fetch through the indirect blocks, allocating as necessary.
298 for (i = 1;;) {
299 error = bread(vp,
300 indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
301 if (error) {
302 goto fail;
304 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
305 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
306 if (i == num)
307 break;
308 i++;
309 if (nb != 0) {
310 brelse(bp, 0);
311 continue;
313 if (fscow_run(bp, true) != 0) {
314 brelse(bp, 0);
315 goto fail;
317 mutex_enter(&ump->um_lock);
318 /* Try to keep snapshot indirect blocks contiguous. */
319 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
320 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
321 flags | B_METAONLY, &bap[0]);
322 if (pref == 0)
323 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
324 NULL);
325 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
326 flags | B_METAONLY, cred, &newb);
327 if (error) {
328 brelse(bp, 0);
329 goto fail;
331 nb = newb;
332 *allocblk++ = nb;
333 error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
334 fs->fs_bsize, true, &nbp);
335 if (error) {
336 brelse(bp, 0);
337 goto fail;
340 * Write synchronously so that indirect blocks
341 * never point at garbage.
343 if ((error = bwrite(nbp)) != 0) {
344 brelse(bp, 0);
345 goto fail;
347 if (unwindidx < 0)
348 unwindidx = i - 1;
349 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
352 * If required, write synchronously, otherwise use
353 * delayed write.
356 if (flags & B_SYNC) {
357 bwrite(bp);
358 } else {
359 bdwrite(bp);
363 if (flags & B_METAONLY) {
364 KASSERT(bpp != NULL);
365 *bpp = bp;
366 return (0);
370 * Get the data block, allocating if necessary.
373 if (nb == 0) {
374 if (fscow_run(bp, true) != 0) {
375 brelse(bp, 0);
376 goto fail;
378 mutex_enter(&ump->um_lock);
379 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
380 &bap[0]);
381 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
382 &newb);
383 if (error) {
384 brelse(bp, 0);
385 goto fail;
387 nb = newb;
388 *allocblk++ = nb;
389 if (bpp != NULL) {
390 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
391 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
392 if (error) {
393 brelse(bp, 0);
394 goto fail;
397 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
398 if (allocib == NULL && unwindidx < 0) {
399 unwindidx = i - 1;
403 * If required, write synchronously, otherwise use
404 * delayed write.
407 if (flags & B_SYNC) {
408 bwrite(bp);
409 } else {
410 bdwrite(bp);
412 return (0);
414 brelse(bp, 0);
415 if (bpp != NULL) {
416 if (flags & B_CLRBUF) {
417 error = bread(vp, lbn, (int)fs->fs_bsize,
418 B_MODIFY, &nbp);
419 if (error) {
420 goto fail;
422 } else {
423 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
424 fs->fs_bsize, true, &nbp);
425 if (error)
426 goto fail;
428 *bpp = nbp;
430 return (0);
432 fail:
434 * If we have failed part way through block allocation, we
435 * have to deallocate any indirect blocks that we have allocated.
438 if (unwindidx >= 0) {
441 * First write out any buffers we've created to resolve their
442 * softdeps. This must be done in reverse order of creation
443 * so that we resolve the dependencies in one pass.
444 * Write the cylinder group buffers for these buffers too.
447 for (i = num; i >= unwindidx; i--) {
448 if (i == 0) {
449 break;
451 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
452 fs->fs_bsize, false, &bp) != 0)
453 continue;
454 if (bp->b_oflags & BO_DELWRI) {
455 nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
456 FFS_DBTOFSB(fs, bp->b_blkno))));
457 bwrite(bp);
458 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
459 fs->fs_cgsize, false, &bp) != 0)
460 continue;
461 if (bp->b_oflags & BO_DELWRI) {
462 bwrite(bp);
463 } else {
464 brelse(bp, BC_INVAL);
466 } else {
467 brelse(bp, BC_INVAL);
472 * Undo the partial allocation.
474 if (unwindidx == 0) {
475 *allocib = 0;
476 ip->i_flag |= IN_CHANGE | IN_UPDATE;
477 } else {
478 int r;
480 r = bread(vp, indirs[unwindidx].in_lbn,
481 (int)fs->fs_bsize, 0, &bp);
482 if (r) {
483 panic("Could not unwind indirect block, error %d", r);
484 } else {
485 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
486 bap[indirs[unwindidx].in_off] = 0;
487 bwrite(bp);
490 for (i = unwindidx + 1; i <= num; i++) {
491 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
492 fs->fs_bsize, false, &bp) == 0)
493 brelse(bp, BC_INVAL);
496 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
497 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
498 deallocated += fs->fs_bsize;
500 if (deallocated) {
501 #if defined(QUOTA) || defined(QUOTA2)
503 * Restore user's disk quota because allocation failed.
505 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
506 #endif
507 ip->i_ffs1_blocks -= btodb(deallocated);
508 ip->i_flag |= IN_CHANGE | IN_UPDATE;
510 return (error);
513 static int
514 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
515 int flags, struct buf **bpp)
517 daddr_t lbn, lastlbn;
518 struct buf *bp, *nbp;
519 struct inode *ip = VTOI(vp);
520 struct fs *fs = ip->i_fs;
521 struct ufsmount *ump = ip->i_ump;
522 struct indir indirs[UFS_NIADDR + 2];
523 daddr_t newb, pref, nb;
524 int64_t *bap;
525 int deallocated, osize, nsize, num, i, error;
526 daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
527 int64_t *allocib;
528 int unwindidx = -1;
529 const int needswap = UFS_FSNEEDSWAP(fs);
530 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
532 lbn = ffs_lblkno(fs, off);
533 size = ffs_blkoff(fs, off) + size;
534 if (size > fs->fs_bsize)
535 panic("ffs_balloc: blk too big");
536 if (bpp != NULL) {
537 *bpp = NULL;
539 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
541 if (lbn < 0)
542 return (EFBIG);
544 #ifdef notyet
546 * Check for allocating external data.
548 if (flags & IO_EXT) {
549 if (lbn >= UFS_NXADDR)
550 return (EFBIG);
552 * If the next write will extend the data into a new block,
553 * and the data is currently composed of a fragment
554 * this fragment has to be extended to be a full block.
556 lastlbn = ffs_lblkno(fs, dp->di_extsize);
557 if (lastlbn < lbn) {
558 nb = lastlbn;
559 osize = ffs_sblksize(fs, dp->di_extsize, nb);
560 if (osize < fs->fs_bsize && osize > 0) {
561 mutex_enter(&ump->um_lock);
562 error = ffs_realloccg(ip, -1 - nb,
563 dp->di_extb[nb],
564 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
565 flags, &dp->di_extb[0]),
566 osize,
567 (int)fs->fs_bsize, cred, &bp);
568 if (error)
569 return (error);
570 dp->di_extsize = smalllblktosize(fs, nb + 1);
571 dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno);
572 bp->b_xflags |= BX_ALTDATA;
573 ip->i_flag |= IN_CHANGE | IN_UPDATE;
574 if (flags & IO_SYNC)
575 bwrite(bp);
576 else
577 bawrite(bp);
581 * All blocks are direct blocks
583 if (flags & BA_METAONLY)
584 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
585 nb = dp->di_extb[lbn];
586 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
587 error = bread(vp, -1 - lbn, fs->fs_bsize,
588 0, &bp);
589 if (error) {
590 return (error);
592 mutex_enter(&bp->b_interlock);
593 bp->b_blkno = FFS_FSBTODB(fs, nb);
594 bp->b_xflags |= BX_ALTDATA;
595 mutex_exit(&bp->b_interlock);
596 *bpp = bp;
597 return (0);
599 if (nb != 0) {
601 * Consider need to reallocate a fragment.
603 osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize));
604 nsize = ffs_fragroundup(fs, size);
605 if (nsize <= osize) {
606 error = bread(vp, -1 - lbn, osize,
607 0, &bp);
608 if (error) {
609 return (error);
611 mutex_enter(&bp->b_interlock);
612 bp->b_blkno = FFS_FSBTODB(fs, nb);
613 bp->b_xflags |= BX_ALTDATA;
614 mutex_exit(&bp->b_interlock);
615 } else {
616 mutex_enter(&ump->um_lock);
617 error = ffs_realloccg(ip, -1 - lbn,
618 dp->di_extb[lbn],
619 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
620 &dp->di_extb[0]),
621 osize, nsize, cred, &bp);
622 if (error)
623 return (error);
624 bp->b_xflags |= BX_ALTDATA;
626 } else {
627 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
628 nsize = ffs_fragroundup(fs, size);
629 else
630 nsize = fs->fs_bsize;
631 mutex_enter(&ump->um_lock);
632 error = ffs_alloc(ip, lbn,
633 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
634 &dp->di_extb[0]),
635 nsize, flags, cred, &newb);
636 if (error)
637 return (error);
638 error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb),
639 nsize, (flags & BA_CLRBUF) != 0, &bp);
640 if (error)
641 return error;
642 bp->b_xflags |= BX_ALTDATA;
644 dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno);
645 ip->i_flag |= IN_CHANGE | IN_UPDATE;
646 *bpp = bp;
647 return (0);
649 #endif
651 * If the next write will extend the file into a new block,
652 * and the file is currently composed of a fragment
653 * this fragment has to be extended to be a full block.
656 lastlbn = ffs_lblkno(fs, ip->i_size);
657 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
658 nb = lastlbn;
659 osize = ffs_blksize(fs, ip, nb);
660 if (osize < fs->fs_bsize && osize > 0) {
661 mutex_enter(&ump->um_lock);
662 error = ffs_realloccg(ip, nb,
663 ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
664 &ip->i_ffs2_db[0]),
665 osize, (int)fs->fs_bsize, cred, bpp, &newb);
666 if (error)
667 return (error);
668 ip->i_size = ffs_lblktosize(fs, nb + 1);
669 ip->i_ffs2_size = ip->i_size;
670 uvm_vnp_setsize(vp, ip->i_size);
671 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
672 ip->i_flag |= IN_CHANGE | IN_UPDATE;
673 if (bpp) {
674 if (flags & B_SYNC)
675 bwrite(*bpp);
676 else
677 bawrite(*bpp);
683 * The first UFS_NDADDR blocks are direct blocks
686 if (lbn < UFS_NDADDR) {
687 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
688 if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
691 * The block is an already-allocated direct block
692 * and the file already extends past this block,
693 * thus this must be a whole block.
694 * Just read the block (if requested).
697 if (bpp != NULL) {
698 error = bread(vp, lbn, fs->fs_bsize,
699 B_MODIFY, bpp);
700 if (error) {
701 return (error);
704 return (0);
706 if (nb != 0) {
709 * Consider need to reallocate a fragment.
712 osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
713 nsize = ffs_fragroundup(fs, size);
714 if (nsize <= osize) {
717 * The existing block is already
718 * at least as big as we want.
719 * Just read the block (if requested).
722 if (bpp != NULL) {
723 error = bread(vp, lbn, osize,
724 B_MODIFY, bpp);
725 if (error) {
726 return (error);
729 return 0;
730 } else {
733 * The existing block is smaller than we want,
734 * grow it.
736 mutex_enter(&ump->um_lock);
737 error = ffs_realloccg(ip, lbn,
738 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
739 &ip->i_ffs2_db[0]),
740 osize, nsize, cred, bpp, &newb);
741 if (error)
742 return (error);
744 } else {
747 * the block was not previously allocated,
748 * allocate a new block or fragment.
751 if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
752 nsize = ffs_fragroundup(fs, size);
753 else
754 nsize = fs->fs_bsize;
755 mutex_enter(&ump->um_lock);
756 error = ffs_alloc(ip, lbn,
757 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
758 &ip->i_ffs2_db[0]),
759 nsize, flags, cred, &newb);
760 if (error)
761 return (error);
762 if (bpp != NULL) {
763 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
764 nsize, (flags & B_CLRBUF) != 0, bpp);
765 if (error)
766 return error;
769 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
770 ip->i_flag |= IN_CHANGE | IN_UPDATE;
771 return (0);
775 * Determine the number of levels of indirection.
778 pref = 0;
779 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
780 return (error);
783 * Fetch the first indirect block allocating if necessary.
786 --num;
787 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
788 allocib = NULL;
789 allocblk = allociblk;
790 if (nb == 0) {
791 mutex_enter(&ump->um_lock);
792 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
793 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
794 flags | B_METAONLY, cred, &newb);
795 if (error)
796 goto fail;
797 nb = newb;
798 *allocblk++ = nb;
799 error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
800 fs->fs_bsize, true, &bp);
801 if (error)
802 goto fail;
804 * Write synchronously so that indirect blocks
805 * never point at garbage.
807 if ((error = bwrite(bp)) != 0)
808 goto fail;
809 unwindidx = 0;
810 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
811 *allocib = ufs_rw64(nb, needswap);
812 ip->i_flag |= IN_CHANGE | IN_UPDATE;
816 * Fetch through the indirect blocks, allocating as necessary.
819 for (i = 1;;) {
820 error = bread(vp,
821 indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
822 if (error) {
823 goto fail;
825 bap = (int64_t *)bp->b_data;
826 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
827 if (i == num)
828 break;
829 i++;
830 if (nb != 0) {
831 brelse(bp, 0);
832 continue;
834 if (fscow_run(bp, true) != 0) {
835 brelse(bp, 0);
836 goto fail;
838 mutex_enter(&ump->um_lock);
839 /* Try to keep snapshot indirect blocks contiguous. */
840 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
841 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
842 flags | B_METAONLY, &bap[0]);
843 if (pref == 0)
844 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
845 NULL);
846 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
847 flags | B_METAONLY, cred, &newb);
848 if (error) {
849 brelse(bp, 0);
850 goto fail;
852 nb = newb;
853 *allocblk++ = nb;
854 error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
855 fs->fs_bsize, true, &nbp);
856 if (error) {
857 brelse(bp, 0);
858 goto fail;
861 * Write synchronously so that indirect blocks
862 * never point at garbage.
864 if ((error = bwrite(nbp)) != 0) {
865 brelse(bp, 0);
866 goto fail;
868 if (unwindidx < 0)
869 unwindidx = i - 1;
870 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
873 * If required, write synchronously, otherwise use
874 * delayed write.
877 if (flags & B_SYNC) {
878 bwrite(bp);
879 } else {
880 bdwrite(bp);
884 if (flags & B_METAONLY) {
885 KASSERT(bpp != NULL);
886 *bpp = bp;
887 return (0);
891 * Get the data block, allocating if necessary.
894 if (nb == 0) {
895 if (fscow_run(bp, true) != 0) {
896 brelse(bp, 0);
897 goto fail;
899 mutex_enter(&ump->um_lock);
900 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
901 &bap[0]);
902 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
903 &newb);
904 if (error) {
905 brelse(bp, 0);
906 goto fail;
908 nb = newb;
909 *allocblk++ = nb;
910 if (bpp != NULL) {
911 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
912 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
913 if (error) {
914 brelse(bp, 0);
915 goto fail;
918 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
919 if (allocib == NULL && unwindidx < 0) {
920 unwindidx = i - 1;
924 * If required, write synchronously, otherwise use
925 * delayed write.
928 if (flags & B_SYNC) {
929 bwrite(bp);
930 } else {
931 bdwrite(bp);
933 return (0);
935 brelse(bp, 0);
936 if (bpp != NULL) {
937 if (flags & B_CLRBUF) {
938 error = bread(vp, lbn, (int)fs->fs_bsize,
939 B_MODIFY, &nbp);
940 if (error) {
941 goto fail;
943 } else {
944 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
945 fs->fs_bsize, true, &nbp);
946 if (error)
947 goto fail;
949 *bpp = nbp;
951 return (0);
953 fail:
955 * If we have failed part way through block allocation, we
956 * have to deallocate any indirect blocks that we have allocated.
959 if (unwindidx >= 0) {
962 * First write out any buffers we've created to resolve their
963 * softdeps. This must be done in reverse order of creation
964 * so that we resolve the dependencies in one pass.
965 * Write the cylinder group buffers for these buffers too.
968 for (i = num; i >= unwindidx; i--) {
969 if (i == 0) {
970 break;
972 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
973 fs->fs_bsize, false, &bp) != 0)
974 continue;
975 if (bp->b_oflags & BO_DELWRI) {
976 nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
977 FFS_DBTOFSB(fs, bp->b_blkno))));
978 bwrite(bp);
979 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
980 fs->fs_cgsize, false, &bp) != 0)
981 continue;
982 if (bp->b_oflags & BO_DELWRI) {
983 bwrite(bp);
984 } else {
985 brelse(bp, BC_INVAL);
987 } else {
988 brelse(bp, BC_INVAL);
993 * Now that any dependencies that we created have been
994 * resolved, we can undo the partial allocation.
997 if (unwindidx == 0) {
998 *allocib = 0;
999 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1000 } else {
1001 int r;
1003 r = bread(vp, indirs[unwindidx].in_lbn,
1004 (int)fs->fs_bsize, 0, &bp);
1005 if (r) {
1006 panic("Could not unwind indirect block, error %d", r);
1007 } else {
1008 bap = (int64_t *)bp->b_data;
1009 bap[indirs[unwindidx].in_off] = 0;
1010 bwrite(bp);
1013 for (i = unwindidx + 1; i <= num; i++) {
1014 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1015 fs->fs_bsize, false, &bp) == 0)
1016 brelse(bp, BC_INVAL);
1019 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1020 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1021 deallocated += fs->fs_bsize;
1023 if (deallocated) {
1024 #if defined(QUOTA) || defined(QUOTA2)
1026 * Restore user's disk quota because allocation failed.
1028 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1029 #endif
1030 ip->i_ffs2_blocks -= btodb(deallocated);
1031 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1034 return (error);